You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/25 02:10:29 UTC

svn commit: r1188452 - in /lucene/dev/branches/branch_3x/lucene: ./ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: rmuir
Date: Tue Oct 25 00:10:29 2011
New Revision: 1188452

URL: http://svn.apache.org/viewvc?rev=1188452&view=rev
Log:
LUCENE-3529: creating empty field + empty term leads to invalid index

Modified:
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Tue Oct 25 00:10:29 2011
@@ -71,6 +71,11 @@ Bug fixes
   for a document depending if you used nextDoc() versus advance().
   (Mike McCandless, Robert Muir)
 
+* LUCENE-3529: Properly support indexing an empty field with empty term text.
+  Previously, if you had assertions enabled you would receive an error during
+  flush, if you didn't, you would get an invalid index.  
+  (Mike McCandless, Robert Muir)
+
 New Features
 
 * LUCENE-3448: Added FixedBitSet.and(other/DISI), andNot(other/DISI).

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java Tue Oct 25 00:10:29 2011
@@ -396,8 +396,6 @@ public final class Field extends Abstrac
       throw new NullPointerException("name cannot be null");
     if (value == null)
       throw new NullPointerException("value cannot be null");
-    if (name.length() == 0 && value.length() == 0)
-      throw new IllegalArgumentException("name and value cannot both be empty");
     if (index == Index.NO && store == Store.NO)
       throw new IllegalArgumentException("it doesn't make sense to have a field that "
          + "is neither indexed nor stored");

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java Tue Oct 25 00:10:29 2011
@@ -25,6 +25,7 @@ final class SegmentTermEnum extends Term
   FieldInfos fieldInfos;
   long size;
   long position = -1;
+  private boolean first = true;
 
   private TermBuffer termBuffer = new TermBuffer();
   private TermBuffer prevBuffer = new TermBuffer();
@@ -116,6 +117,7 @@ final class SegmentTermEnum extends Term
     termBuffer.set(t);
     prevBuffer.reset();
     termInfo.set(ti);
+    first = p == -1;
   }
 
   /** Increments the enumeration to the next element.  True if one exists.*/
@@ -163,6 +165,12 @@ final class SegmentTermEnum extends Term
   final int scanTo(Term term) throws IOException {
     scanBuffer.set(term);
     int count = 0;
+    if (first) {
+      // Always force initial next() in case term is Term("", "")
+      next();
+      first = false;
+      count++;
+    }
     while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
       count++;
     }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java Tue Oct 25 00:10:29 2011
@@ -276,13 +276,7 @@ final class TermInfosReader implements C
     if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
       ti = enumerator.termInfo();
       if (tiOrd == null) {
-        // LUCENE-3183: it's possible, if term is Term("",
-        // ""), for the STE to be incorrectly un-positioned
-        // after scan-to; work around this by not caching in
-        // this case:
-        if (enumerator.position >= 0) {
-          termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
-        }
+        termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
       } else {
         assert sameTermInfo(ti, tiOrd, enumerator);
         assert enumerator.position == tiOrd.termOrd;

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java Tue Oct 25 00:10:29 2011
@@ -167,6 +167,11 @@ final class TermInfosWriter implements C
       if (ch1 != ch2)
         return ch1-ch2;
     }
+    if (utf16Result1.length == 0 && lastFieldNumber == -1) {
+      // If there is a field named "" (empty string) with a term text of "" (empty string) then we
+      // will get 0 on this comparison, yet, it's "OK". 
+      return -1;
+    }
     return utf16Result1.length - utf16Result2.length;
   }
 

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Oct 25 00:10:29 2011
@@ -1096,6 +1096,33 @@ public class TestIndexWriter extends Luc
     reader.close();
     dir.close();
   }
+  
+  public void testEmptyFieldNameEmptyTerm() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+    Document doc = new Document();
+    // TODO: why do we throw IAE: name and value cannot both be empty in Field ctor?!
+    doc.add(newField("", "", Field.Store.NO, Field.Index.NOT_ANALYZED));
+    doc.add(newField("", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
+    doc.add(newField("", "b", Field.Store.NO, Field.Index.NOT_ANALYZED));
+    doc.add(newField("", "c", Field.Store.NO, Field.Index.NOT_ANALYZED));
+    writer.addDocument(doc);  
+    writer.close();
+    IndexReader reader = IndexReader.open(dir, true);
+    IndexReader subreader = SegmentReader.getOnlySegmentReader(reader);
+    TermEnum te = subreader.terms();
+    assertTrue(te.next());
+    assertEquals(new Term("", ""), te.term());
+    assertTrue(te.next());
+    assertEquals(new Term("", "a"), te.term());
+    assertTrue(te.next());
+    assertEquals(new Term("", "b"), te.term());
+    assertTrue(te.next());
+    assertEquals(new Term("", "c"), te.term());
+    assertFalse(te.next());
+    reader.close();
+    dir.close();
+  }
 
   private static final class MockIndexWriter extends IndexWriter {