You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/11 16:20:14 UTC
svn commit: r1371960 - in /lucene/dev/branches/branch_4x/lucene: ./
core/src/java/org/apache/lucene/codecs/lucene3x/
core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/
Author: rmuir
Date: Sat Aug 11 14:20:14 2012
New Revision: 1371960
URL: http://svn.apache.org/viewvc?rev=1371960&view=rev
Log:
Don't allow negatives in the positions file
Added:
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip (with props)
Modified:
lucene/dev/branches/branch_4x/lucene/CHANGES.txt
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Aug 11 14:20:14 2012
@@ -31,6 +31,11 @@ Bug Fixes
had a custom Similarity where coord(1,1) != 1F, then the rewritten
query would be scored differently. (Robert Muir)
+* Don't allow negatives in the positions file. If you have an index
+ from 2.4.0 or earlier with such negative positions, and you already
+ upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run
+ CheckIndex. If it fails, then you need to upgrade again to 4.0 (Robert Muir)
+
======================= Lucene 4.0.0-BETA =======================
New features
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java Sat Aug 11 14:20:14 2012
@@ -417,7 +417,11 @@ class Lucene3xTermVectorsReader extends
int positions[] = new int[freq];
int pos = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
- pos += tvf.readVInt();
+ int delta = tvf.readVInt();
+ if (delta == -1) {
+ delta = 0; // LUCENE-1542 correction
+ }
+ pos += delta;
positions[posUpto] = pos;
}
t.positions = positions;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java Sat Aug 11 14:20:14 2012
@@ -99,6 +99,8 @@ extends SegmentTermDocs {
}
delta >>>= 1;
needToLoadPayload = true;
+ } else if (delta == -1) {
+ delta = 0; // LUCENE-1542 correction
}
return delta;
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Sat Aug 11 14:20:14 2012
@@ -824,11 +824,8 @@ public class CheckIndex {
if (hasPositions) {
for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- if (pos < -1) {
+ if (pos < 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
}
if (pos < lastPos) {
@@ -921,14 +918,8 @@ public class CheckIndex {
int lastOffset = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
- if (pos < -1) {
+
+ if (pos < 0) {
throw new RuntimeException("position " + pos + " is out of bounds");
}
if (pos < lastPosition) {
@@ -1500,7 +1491,7 @@ public class CheckIndex {
int pos = postings.nextPosition();
if (postingsPostings != null) {
int postingsPos = postingsPostings.nextPosition();
- if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+ if (terms.hasPositions() && pos != postingsPos) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Sat Aug 11 14:20:14 2012
@@ -53,8 +53,10 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
@@ -876,5 +878,73 @@ public class TestBackwardsCompatibility
_TestUtil.checkIndex(dir);
dir.close();
}
-
+
+ /*
+ * Index with negative positions (LUCENE-1542)
+ * Created with this code, using a 2.4.0 jar, then upgraded with 3.6 upgrader:
+ *
+ * public class CreateBogusIndexes {
+ * public static void main(String args[]) throws Exception {
+ * Directory d = FSDirectory.getDirectory("/tmp/bogus24");
+ * IndexWriter iw = new IndexWriter(d, new StandardAnalyzer());
+ * Document doc = new Document();
+ * Token brokenToken = new Token("broken", 0, 3);
+ * brokenToken.setPositionIncrement(0);
+ * Token okToken = new Token("ok", 0, 2);
+ * doc.add(new Field("field1", new CannedTokenStream(brokenToken), Field.TermVector.NO));
+ * doc.add(new Field("field2", new CannedTokenStream(brokenToken), Field.TermVector.WITH_POSITIONS));
+ * doc.add(new Field("field3", new CannedTokenStream(brokenToken, okToken), Field.TermVector.NO));
+ * doc.add(new Field("field4", new CannedTokenStream(brokenToken, okToken), Field.TermVector.WITH_POSITIONS));
+ * iw.addDocument(doc);
+ * doc = new Document();
+ * doc.add(new Field("field1", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
+ * doc.add(new Field("field2", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+ * doc.add(new Field("field3", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
+ * doc.add(new Field("field4", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+ * iw.addDocument(doc);
+ * iw.close();
+ * d.close();
+ * }
+ *
+ * static class CannedTokenStream extends TokenStream {
+ * private final Token[] tokens;
+ * private int upto = 0;
+ *
+ * CannedTokenStream(Token... tokens) {
+ * this.tokens = tokens;
+ * }
+ *
+ * @Override
+ * public Token next() {
+ * if (upto < tokens.length) {
+ * return tokens[upto++];
+ * } else {
+ * return null;
+ * }
+ * }
+ * }
+ * }
+ */
+ public static final String bogus24IndexName = "bogus24.upgraded.to.36.zip";
+
+ public void testNegativePositions() throws Exception {
+ File oldIndexDir = _TestUtil.getTempDir("negatives");
+ _TestUtil.unzip(getDataFile(bogus24IndexName), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ DirectoryReader ir = DirectoryReader.open(dir);
+ IndexSearcher is = new IndexSearcher(ir);
+ PhraseQuery pq = new PhraseQuery();
+ pq.add(new Term("field3", "more"));
+ pq.add(new Term("field3", "text"));
+ TopDocs td = is.search(pq, 10);
+ assertEquals(1, td.totalHits);
+ SlowCompositeReaderWrapper wrapper = new SlowCompositeReaderWrapper(ir);
+ DocsAndPositionsEnum de = wrapper.termPositionsEnum(null, "field3", new BytesRef("broken"));
+ assert de != null;
+ assertEquals(0, de.nextDoc());
+ assertEquals(0, de.nextPosition());
+ ir.close();
+ _TestUtil.checkIndex(dir);
+ dir.close();
+ }
}
Added: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip?rev=1371960&view=auto
==============================================================================
Binary file - no diff available.