You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/01/08 19:11:09 UTC

svn commit: r494136 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/util/ src/test/org/apache/lucene/index/

Author: mikemccand
Date: Mon Jan  8 10:11:08 2007
New Revision: 494136

URL: http://svn.apache.org/viewvc?view=rev&rev=494136
Log:
LUCENE-140: Add bounds checking to BitVector's get, set, clear methods
to prevent index corruption on calling IndexReader.deleteDocument(int
docNum) on a "slightly" out of bounds docNum.  Other changes:

  * In IndexReader.deleteDocument, set hasChanges to true before
    calling doDelete in case an Exception is hit in doDelete.

  * Changed the "docs out of order" check to be tighter (<= instead of
    <) to catch boundary case that was missed.

  * Fixed small unrelated javadoc typo.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/java/trunk/src/java/org/apache/lucene/util/BitVector.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=494136&r1=494135&r2=494136
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Jan  8 10:11:08 2007
@@ -281,6 +281,15 @@
     fixing the original FieldCache performance problem.
     (Chris Hostetter, Yonik Seeley)
 
+29. LUCENE-140: Fix IndexReader.deleteDocument(int docNum) to
+    correctly raise ArrayIndexOutOfBoundsException when docNum is too
+    large.  Previously, if docNum was only slightly too large (within
+    the same multiple of 8, ie, up to 7 ints beyond maxDoc), no
+    exception would be raised and instead the index would become
+    silently corrupted.  The corruption then only appears much later,
+    in mergeSegments, when the corrupted segment is merged with
+    segment(s) after it. (Mike McCandless)
+
 Optimizations
 
   1. LUCENE-586: TermDocs.skipTo() is now more efficient for

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?view=diff&rev=494136&r1=494135&r2=494136
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Mon Jan  8 10:11:08 2007
@@ -541,8 +541,8 @@
   public final synchronized void deleteDocument(int docNum) throws IOException {
     if(directoryOwner)
       aquireWriteLock();
-    doDelete(docNum);
     hasChanges = true;
+    doDelete(docNum);
   }
 
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?view=diff&rev=494136&r1=494135&r2=494136
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Mon Jan  8 10:11:08 2007
@@ -348,9 +348,9 @@
           doc = docMap[doc];                      // map around deletions
         doc += base;                              // convert to merged space
 
-        if (doc < lastDoc)
+        if (lastDoc != 0 && doc <= lastDoc)
           throw new IllegalStateException("docs out of order (" + doc +
-              " < " + lastDoc + " )");
+              " <= " + lastDoc + " )");
 
         df++;
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/BitVector.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/BitVector.java?view=diff&rev=494136&r1=494135&r2=494136
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/BitVector.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/BitVector.java Mon Jan  8 10:11:08 2007
@@ -49,12 +49,18 @@
 
   /** Sets the value of <code>bit</code> to one. */
   public final void set(int bit) {
+    if (bit >= size) {
+      throw new ArrayIndexOutOfBoundsException(bit);
+    }
     bits[bit >> 3] |= 1 << (bit & 7);
     count = -1;
   }
 
   /** Sets the value of <code>bit</code> to zero. */
   public final void clear(int bit) {
+    if (bit >= size) {
+      throw new ArrayIndexOutOfBoundsException(bit);
+    }
     bits[bit >> 3] &= ~(1 << (bit & 7));
     count = -1;
   }
@@ -62,6 +68,9 @@
   /** Returns <code>true</code> if <code>bit</code> is one and
     <code>false</code> if it is zero. */
   public final boolean get(int bit) {
+    if (bit >= size) {
+      throw new ArrayIndexOutOfBoundsException(bit);
+    }
     return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
   }
 
@@ -147,7 +156,7 @@
     }
   }
 
-  /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or desnse, and should be saved as a bit set. */
+  /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */
   private boolean isSparse() {
     // note: order of comparisons below set to favor smaller values (no binary range search.)
     // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java?view=diff&rev=494136&r1=494135&r2=494136
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java Mon Jan  8 10:11:08 2007
@@ -749,6 +749,45 @@
         diskFree += 10;
       }
     }
+
+    public void testDocsOutOfOrderJIRA140() throws IOException {
+      Directory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      for(int i=0;i<11;i++) {
+        addDoc(writer, "aaa");
+      }
+      writer.close();
+      IndexReader reader = IndexReader.open(dir);
+
+      // Try to delete an invalid docId, yet, within range
+      // of the final bits of the BitVector:
+
+      boolean gotException = false;
+      try {
+        reader.deleteDocument(11);
+      } catch (ArrayIndexOutOfBoundsException e) {
+        gotException = true;
+      }
+      reader.close();
+
+      writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+
+      // We must add more docs to get a new segment written
+      for(int i=0;i<11;i++) {
+        addDoc(writer, "aaa");
+      }
+
+      try {
+        writer.optimize();
+      } catch (IllegalStateException e) {
+        e.printStackTrace();
+        fail("hit unexpected illegal state exception during optimize");
+      }
+
+      if (!gotException) {
+        fail("delete of out-of-bounds doc number failed to hit exception");
+      }
+    }
     
     private String arrayToString(String[] l) {
       String s = "";