You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/25 02:10:29 UTC
svn commit: r1188452 - in /lucene/dev/branches/branch_3x/lucene: ./
src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/
src/test/org/apache/lucene/index/
Author: rmuir
Date: Tue Oct 25 00:10:29 2011
New Revision: 1188452
URL: http://svn.apache.org/viewvc?rev=1188452&view=rev
Log:
LUCENE-3529: creating empty field + empty term leads to invalid index
Modified:
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Tue Oct 25 00:10:29 2011
@@ -71,6 +71,11 @@ Bug fixes
for a document depending if you used nextDoc() versus advance().
(Mike McCandless, Robert Muir)
+* LUCENE-3529: Properly support indexing an empty field with empty term text.
+ Previously, if you had assertions enabled you would receive an error during
+ flush, if you didn't, you would get an invalid index.
+ (Mike McCandless, Robert Muir)
+
New Features
* LUCENE-3448: Added FixedBitSet.and(other/DISI), andNot(other/DISI).
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/Field.java Tue Oct 25 00:10:29 2011
@@ -396,8 +396,6 @@ public final class Field extends Abstrac
throw new NullPointerException("name cannot be null");
if (value == null)
throw new NullPointerException("value cannot be null");
- if (name.length() == 0 && value.length() == 0)
- throw new IllegalArgumentException("name and value cannot both be empty");
if (index == Index.NO && store == Store.NO)
throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ "is neither indexed nor stored");
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java Tue Oct 25 00:10:29 2011
@@ -25,6 +25,7 @@ final class SegmentTermEnum extends Term
FieldInfos fieldInfos;
long size;
long position = -1;
+ private boolean first = true;
private TermBuffer termBuffer = new TermBuffer();
private TermBuffer prevBuffer = new TermBuffer();
@@ -116,6 +117,7 @@ final class SegmentTermEnum extends Term
termBuffer.set(t);
prevBuffer.reset();
termInfo.set(ti);
+ first = p == -1;
}
/** Increments the enumeration to the next element. True if one exists.*/
@@ -163,6 +165,12 @@ final class SegmentTermEnum extends Term
final int scanTo(Term term) throws IOException {
scanBuffer.set(term);
int count = 0;
+ if (first) {
+ // Always force initial next() in case term is Term("", "")
+ next();
+ first = false;
+ count++;
+ }
while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
count++;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosReader.java Tue Oct 25 00:10:29 2011
@@ -276,13 +276,7 @@ final class TermInfosReader implements C
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
ti = enumerator.termInfo();
if (tiOrd == null) {
- // LUCENE-3183: it's possible, if term is Term("",
- // ""), for the STE to be incorrectly un-positioned
- // after scan-to; work around this by not caching in
- // this case:
- if (enumerator.position >= 0) {
- termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
- }
+ termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
assert enumerator.position == tiOrd.termOrd;
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/TermInfosWriter.java Tue Oct 25 00:10:29 2011
@@ -167,6 +167,11 @@ final class TermInfosWriter implements C
if (ch1 != ch2)
return ch1-ch2;
}
+ if (utf16Result1.length == 0 && lastFieldNumber == -1) {
+ // If there is a field named "" (empty string) with a term text of "" (empty string) then we
+ // will get 0 on this comparison, yet, it's "OK".
+ return -1;
+ }
return utf16Result1.length - utf16Result2.length;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1188452&r1=1188451&r2=1188452&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Oct 25 00:10:29 2011
@@ -1096,6 +1096,33 @@ public class TestIndexWriter extends Luc
reader.close();
dir.close();
}
+
+ public void testEmptyFieldNameEmptyTerm() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ Document doc = new Document();
+ // TODO: why do we throw IAE: name and value cannot both be empty in Field ctor?!
+ doc.add(newField("", "", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ doc.add(newField("", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ doc.add(newField("", "b", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ doc.add(newField("", "c", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ writer.addDocument(doc);
+ writer.close();
+ IndexReader reader = IndexReader.open(dir, true);
+ IndexReader subreader = SegmentReader.getOnlySegmentReader(reader);
+ TermEnum te = subreader.terms();
+ assertTrue(te.next());
+ assertEquals(new Term("", ""), te.term());
+ assertTrue(te.next());
+ assertEquals(new Term("", "a"), te.term());
+ assertTrue(te.next());
+ assertEquals(new Term("", "b"), te.term());
+ assertTrue(te.next());
+ assertEquals(new Term("", "c"), te.term());
+ assertFalse(te.next());
+ reader.close();
+ dir.close();
+ }
private static final class MockIndexWriter extends IndexWriter {