You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/08/02 20:54:25 UTC

svn commit: r1509805 - in /lucene/dev/trunk/lucene: CHANGES.txt codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java core/src/java/org/apache/lucene/util/fst/FST.java

Author: simonw
Date: Fri Aug  2 18:54:25 2013
New Revision: 1509805

URL: http://svn.apache.org/r1509805
Log:
LUCENE-5152: Fix MemoryPostingsFormat to not modify borrowed BytesRef from FSTEnum

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug  2 18:54:25 2013
@@ -110,6 +110,10 @@ Bug Fixes
 * LUCENE-5151: Associations FacetsAggregators could enter an infinite loop when
   some result documents were missing category associations. (Shai Erera)
 
+* LUCENE-5152: Fix MemoryPostingsFormat to not modify borrowed BytesRef from FSTEnum
+  seek/lookup which can cause sideeffects if done on a cached FST root arc.
+  (Simon Willnauer)
+
 API Changes
 
 * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Aug  2 18:54:25 2013
@@ -335,11 +335,11 @@ public final class MemoryPostingsFormat 
     
     public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
       assert numDocs > 0;
-      if (buffer.length < bufferIn.length - bufferIn.offset) {
-        buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+      if (buffer.length < bufferIn.length) {
+        buffer = ArrayUtil.grow(buffer, bufferIn.length);
       }
-      in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
-      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+      in.reset(buffer, 0, bufferIn.length);
+      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
       this.liveDocs = liveDocs;
       docID = -1;
       accum = 0;
@@ -472,11 +472,11 @@ public final class MemoryPostingsFormat 
       //   System.out.println("  " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
       // }
 
-      if (buffer.length < bufferIn.length - bufferIn.offset) {
-        buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+      if (buffer.length < bufferIn.length) {
+        buffer = ArrayUtil.grow(buffer, bufferIn.length);
       }
       in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
-      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
       this.liveDocs = liveDocs;
       docID = -1;
       accum = 0;
@@ -632,6 +632,7 @@ public final class MemoryPostingsFormat 
     private int docFreq;
     private long totalTermFreq;
     private BytesRefFSTEnum.InputOutput<BytesRef> current;
+    private BytesRef postingsSpare = new BytesRef();
 
     public FSTTermsEnum(FieldInfo field, FST<BytesRef> fst) {
       this.field = field;
@@ -640,14 +641,16 @@ public final class MemoryPostingsFormat 
 
     private void decodeMetaData() {
       if (!didDecode) {
-        buffer.reset(current.output.bytes, 0, current.output.length);
+        buffer.reset(current.output.bytes, current.output.offset, current.output.length);
         docFreq = buffer.readVInt();
         if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
           totalTermFreq = docFreq + buffer.readVLong();
         } else {
           totalTermFreq = -1;
         }
-        current.output.offset = buffer.getPosition();
+        postingsSpare.bytes = current.output.bytes;
+        postingsSpare.offset = buffer.getPosition();
+        postingsSpare.length = current.output.length - (buffer.getPosition() - current.output.offset);
         //System.out.println("  df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
         didDecode = true;
       }
@@ -699,7 +702,7 @@ public final class MemoryPostingsFormat 
           docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
         }
       }
-      return docsEnum.reset(current.output, liveDocs, docFreq);
+      return docsEnum.reset(this.postingsSpare, liveDocs, docFreq);
     }
 
     @Override
@@ -720,7 +723,7 @@ public final class MemoryPostingsFormat 
         }
       }
       //System.out.println("D&P reset this=" + this);
-      return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
+      return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
     }
 
     @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Fri Aug  2 18:54:25 2013
@@ -1707,7 +1707,6 @@ public class BlockTreeTermsReader extend
             if (arc.output != NO_OUTPUT) {
               output = fstOutputs.add(output, arc.output);
             }
-
             // if (DEBUG) {
             //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
             // }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Fri Aug  2 18:54:25 2013
@@ -171,6 +171,8 @@ public final class FST<T> {
   private final boolean allowArrayArcs;
 
   private Arc<T> cachedRootArcs[];
+  private Arc<T> assertingCachedRootArcs[]; // only set wit assert
+
 
   /** Represents a single arc. */
   public final static class Arc<T> {
@@ -213,7 +215,7 @@ public final class FST<T> {
       }
       return this;
     }
-
+    
     boolean flag(int flag) {
       return FST.flag(flags, flag);
     }
@@ -420,11 +422,18 @@ public final class FST<T> {
       return node;
     }
   }
-
+  
   // Caches first 128 labels
   @SuppressWarnings({"rawtypes","unchecked"})
   private void cacheRootArcs() throws IOException {
     cachedRootArcs = (Arc<T>[]) new Arc[0x80];
+    readRootArcs(cachedRootArcs);
+    
+    assert setAssertingRootArcs(cachedRootArcs);
+    assert assertRootArcs();
+  }
+  
+  public void readRootArcs(Arc<T>[] arcs) throws IOException {
     final Arc<T> arc = new Arc<T>();
     getFirstArc(arc);
     final BytesReader in = getBytesReader();
@@ -433,7 +442,7 @@ public final class FST<T> {
       while(true) {
         assert arc.label != END_LABEL;
         if (arc.label < cachedRootArcs.length) {
-          cachedRootArcs[arc.label] = new Arc<T>().copyFrom(arc);
+          arcs[arc.label] = new Arc<T>().copyFrom(arc);
         } else {
           break;
         }
@@ -444,6 +453,38 @@ public final class FST<T> {
       }
     }
   }
+  
+  @SuppressWarnings({"rawtypes","unchecked"})
+  private boolean setAssertingRootArcs(Arc<T>[] arcs) throws IOException {
+    assertingCachedRootArcs = (Arc<T>[]) new Arc[arcs.length];
+    readRootArcs(assertingCachedRootArcs);
+    return true;
+  }
+  
+  private boolean assertRootArcs() {
+    assert cachedRootArcs != null;
+    assert assertingCachedRootArcs != null;
+    for (int i = 0; i < cachedRootArcs.length; i++) {
+      final Arc<T> root = cachedRootArcs[i];
+      final Arc<T> asserting = assertingCachedRootArcs[i];
+      if (root != null) { 
+        assert root.arcIdx == asserting.arcIdx;
+        assert root.bytesPerArc == asserting.bytesPerArc;
+        assert root.flags == asserting.flags;
+        assert root.label == asserting.label;
+        assert root.nextArc == asserting.nextArc;
+        assert root.nextFinalOutput.equals(asserting.nextFinalOutput);
+        assert root.node == asserting.node;
+        assert root.numArcs == asserting.numArcs;
+        assert root.output.equals(asserting.output);
+        assert root.posArcsStart == asserting.posArcsStart;
+        assert root.target == asserting.target;
+      } else {
+        assert root == null && asserting == null;
+      } 
+    }
+    return true;
+  }
 
   public T getEmptyOutput() {
     return emptyOutput;
@@ -1099,7 +1140,7 @@ public final class FST<T> {
   /** Finds an arc leaving the incoming arc, replacing the arc in place.
    *  This returns null if the arc was not found, else the incoming arc. */
   public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
-    assert cachedRootArcs != null;
+    assert assertRootArcs();
 
     if (labelToMatch == END_LABEL) {
       if (follow.isFinal()) {
@@ -1123,7 +1164,7 @@ public final class FST<T> {
     if (follow.target == startNode && labelToMatch < cachedRootArcs.length) {
       final Arc<T> result = cachedRootArcs[labelToMatch];
       if (result == null) {
-        return result;
+        return null;
       } else {
         arc.copyFrom(result);
         return arc;