You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/11 16:20:14 UTC

svn commit: r1371960 - in /lucene/dev/branches/branch_4x/lucene: ./ core/src/java/org/apache/lucene/codecs/lucene3x/ core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/

Author: rmuir
Date: Sat Aug 11 14:20:14 2012
New Revision: 1371960

URL: http://svn.apache.org/viewvc?rev=1371960&view=rev
Log:
Don't allow negatives in the positions file

Added:
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip   (with props)
Modified:
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Aug 11 14:20:14 2012
@@ -31,6 +31,11 @@ Bug Fixes
   had a custom Similarity where coord(1,1) != 1F, then the rewritten
   query would be scored differently.  (Robert Muir)
 
+* Don't allow negatives in the positions file. If you have an index
+  from 2.4.0 or earlier with such negative positions, and you already 
+  upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run 
+  CheckIndex. If it fails, then you need to upgrade again to 4.0  (Robert Muir)
+
 ======================= Lucene 4.0.0-BETA =======================
 
 New features

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java Sat Aug 11 14:20:14 2012
@@ -417,7 +417,11 @@ class Lucene3xTermVectorsReader extends 
           int positions[] = new int[freq];
           int pos = 0;
           for(int posUpto=0;posUpto<freq;posUpto++) {
-            pos += tvf.readVInt();
+            int delta = tvf.readVInt();
+            if (delta == -1) {
+              delta = 0; // LUCENE-1542 correction
+            }
+            pos += delta;
             positions[posUpto] = pos;
           }
           t.positions = positions;

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermPositions.java Sat Aug 11 14:20:14 2012
@@ -99,6 +99,8 @@ extends SegmentTermDocs  {
       } 
       delta >>>= 1;
       needToLoadPayload = true;
+    } else if (delta == -1) {
+      delta = 0; // LUCENE-1542 correction
     }
     return delta;
   }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Sat Aug 11 14:20:14 2012
@@ -824,11 +824,8 @@ public class CheckIndex {
           if (hasPositions) {
             for(int j=0;j<freq;j++) {
               final int pos = postings.nextPosition();
-              // NOTE: pos=-1 is allowed because of ancient bug
-              // (LUCENE-1542) whereby IndexWriter could
-              // write pos=-1 when first token's posInc is 0
 
-              if (pos < -1) {
+              if (pos < 0) {
                 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
               }
               if (pos < lastPos) {
@@ -921,14 +918,8 @@ public class CheckIndex {
               int lastOffset = 0;
               for(int posUpto=0;posUpto<freq;posUpto++) {
                 final int pos = postings.nextPosition();
-                // NOTE: pos=-1 is allowed because of ancient bug
-                // (LUCENE-1542) whereby IndexWriter could
-                // write pos=-1 when first token's posInc is 0
-                // (separately: analyzers should not give
-                // posInc=0 to first token); also, term
-                // vectors are allowed to return pos=-1 if
-                // they indexed offset but not positions:
-                if (pos < -1) {
+
+                if (pos < 0) {
                   throw new RuntimeException("position " + pos + " is out of bounds");
                 }
                 if (pos < lastPosition) {
@@ -1500,7 +1491,7 @@ public class CheckIndex {
                       int pos = postings.nextPosition();
                       if (postingsPostings != null) {
                         int postingsPos = postingsPostings.nextPosition();
-                        if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+                        if (terms.hasPositions() && pos != postingsPos) {
                           throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
                         }
                       }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1371960&r1=1371959&r2=1371960&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Sat Aug 11 14:20:14 2012
@@ -53,8 +53,10 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
@@ -876,5 +878,73 @@ public class TestBackwardsCompatibility 
     _TestUtil.checkIndex(dir);
     dir.close();
   }
-
+  
+  /* 
+   * Index with negative positions (LUCENE-1542)
+   * Created with this code, using a 2.4.0 jar, then upgraded with 3.6 upgrader:
+   *
+   * public class CreateBogusIndexes {
+   *   public static void main(String args[]) throws Exception {
+   *     Directory d = FSDirectory.getDirectory("/tmp/bogus24");
+   *     IndexWriter iw = new IndexWriter(d, new StandardAnalyzer());
+   *     Document doc = new Document();
+   *     Token brokenToken = new Token("broken", 0, 3);
+   *     brokenToken.setPositionIncrement(0);
+   *     Token okToken = new Token("ok", 0, 2);
+   *     doc.add(new Field("field1", new CannedTokenStream(brokenToken), Field.TermVector.NO));
+   *     doc.add(new Field("field2", new CannedTokenStream(brokenToken), Field.TermVector.WITH_POSITIONS));
+   *     doc.add(new Field("field3", new CannedTokenStream(brokenToken, okToken), Field.TermVector.NO));
+   *     doc.add(new Field("field4", new CannedTokenStream(brokenToken, okToken), Field.TermVector.WITH_POSITIONS));
+   *     iw.addDocument(doc);
+   *     doc = new Document();
+   *     doc.add(new Field("field1", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
+   *     doc.add(new Field("field2", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+   *     doc.add(new Field("field3", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
+   *     doc.add(new Field("field4", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+   *     iw.addDocument(doc);
+   *     iw.close();
+   *     d.close();
+   *   }
+   * 
+   *   static class CannedTokenStream extends TokenStream {
+   *     private final Token[] tokens;
+   *     private int upto = 0;
+   *  
+   *     CannedTokenStream(Token... tokens) {
+   *       this.tokens = tokens;
+   *     }
+   *  
+   *     @Override
+   *     public Token next() {
+   *       if (upto < tokens.length) {
+   *         return tokens[upto++];
+   *       } else {
+   *         return null;
+   *       }
+   *     }
+   *   }
+   * }
+   */
+  public static final String bogus24IndexName = "bogus24.upgraded.to.36.zip";
+
+  public void testNegativePositions() throws Exception {
+    File oldIndexDir = _TestUtil.getTempDir("negatives");
+    _TestUtil.unzip(getDataFile(bogus24IndexName), oldIndexDir);
+    Directory dir = newFSDirectory(oldIndexDir);
+    DirectoryReader ir = DirectoryReader.open(dir);
+    IndexSearcher is = new IndexSearcher(ir);
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field3", "more"));
+    pq.add(new Term("field3", "text"));
+    TopDocs td = is.search(pq, 10);
+    assertEquals(1, td.totalHits);
+    SlowCompositeReaderWrapper wrapper = new SlowCompositeReaderWrapper(ir);
+    DocsAndPositionsEnum de = wrapper.termPositionsEnum(null, "field3", new BytesRef("broken"));
+    assert de != null;
+    assertEquals(0, de.nextDoc());
+    assertEquals(0, de.nextPosition());
+    ir.close();
+    _TestUtil.checkIndex(dir);
+    dir.close();
+  }
 }

Added: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/bogus24.upgraded.to.36.zip?rev=1371960&view=auto
==============================================================================
Binary file - no diff available.