You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/08/02 20:54:25 UTC
svn commit: r1509805 - in /lucene/dev/trunk/lucene: CHANGES.txt
codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
core/src/java/org/apache/lucene/util/fst/FST.java
Author: simonw
Date: Fri Aug 2 18:54:25 2013
New Revision: 1509805
URL: http://svn.apache.org/r1509805
Log:
LUCENE-5152: Fix MemoryPostingsFormat to not modify borrowed BytesRef from FSTEnum
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 2 18:54:25 2013
@@ -110,6 +110,10 @@ Bug Fixes
* LUCENE-5151: Associations FacetsAggregators could enter an infinite loop when
some result documents were missing category associations. (Shai Erera)
+* LUCENE-5152: Fix MemoryPostingsFormat to not modify borrowed BytesRef from FSTEnum
+ seek/lookup which can cause sideeffects if done on a cached FST root arc.
+ (Simon Willnauer)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Aug 2 18:54:25 2013
@@ -335,11 +335,11 @@ public final class MemoryPostingsFormat
public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
assert numDocs > 0;
- if (buffer.length < bufferIn.length - bufferIn.offset) {
- buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+ if (buffer.length < bufferIn.length) {
+ buffer = ArrayUtil.grow(buffer, bufferIn.length);
}
- in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
- System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+ in.reset(buffer, 0, bufferIn.length);
+ System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
this.liveDocs = liveDocs;
docID = -1;
accum = 0;
@@ -472,11 +472,11 @@ public final class MemoryPostingsFormat
// System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
// }
- if (buffer.length < bufferIn.length - bufferIn.offset) {
- buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+ if (buffer.length < bufferIn.length) {
+ buffer = ArrayUtil.grow(buffer, bufferIn.length);
}
in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
- System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+ System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
this.liveDocs = liveDocs;
docID = -1;
accum = 0;
@@ -632,6 +632,7 @@ public final class MemoryPostingsFormat
private int docFreq;
private long totalTermFreq;
private BytesRefFSTEnum.InputOutput<BytesRef> current;
+ private BytesRef postingsSpare = new BytesRef();
public FSTTermsEnum(FieldInfo field, FST<BytesRef> fst) {
this.field = field;
@@ -640,14 +641,16 @@ public final class MemoryPostingsFormat
private void decodeMetaData() {
if (!didDecode) {
- buffer.reset(current.output.bytes, 0, current.output.length);
+ buffer.reset(current.output.bytes, current.output.offset, current.output.length);
docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong();
} else {
totalTermFreq = -1;
}
- current.output.offset = buffer.getPosition();
+ postingsSpare.bytes = current.output.bytes;
+ postingsSpare.offset = buffer.getPosition();
+ postingsSpare.length = current.output.length - (buffer.getPosition() - current.output.offset);
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
didDecode = true;
}
@@ -699,7 +702,7 @@ public final class MemoryPostingsFormat
docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
}
}
- return docsEnum.reset(current.output, liveDocs, docFreq);
+ return docsEnum.reset(this.postingsSpare, liveDocs, docFreq);
}
@Override
@@ -720,7 +723,7 @@ public final class MemoryPostingsFormat
}
}
//System.out.println("D&P reset this=" + this);
- return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
+ return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Fri Aug 2 18:54:25 2013
@@ -1707,7 +1707,6 @@ public class BlockTreeTermsReader extend
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
-
// if (DEBUG) {
// System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1509805&r1=1509804&r2=1509805&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Fri Aug 2 18:54:25 2013
@@ -171,6 +171,8 @@ public final class FST<T> {
private final boolean allowArrayArcs;
private Arc<T> cachedRootArcs[];
+ private Arc<T> assertingCachedRootArcs[]; // only set wit assert
+
/** Represents a single arc. */
public final static class Arc<T> {
@@ -213,7 +215,7 @@ public final class FST<T> {
}
return this;
}
-
+
boolean flag(int flag) {
return FST.flag(flags, flag);
}
@@ -420,11 +422,18 @@ public final class FST<T> {
return node;
}
}
-
+
// Caches first 128 labels
@SuppressWarnings({"rawtypes","unchecked"})
private void cacheRootArcs() throws IOException {
cachedRootArcs = (Arc<T>[]) new Arc[0x80];
+ readRootArcs(cachedRootArcs);
+
+ assert setAssertingRootArcs(cachedRootArcs);
+ assert assertRootArcs();
+ }
+
+ public void readRootArcs(Arc<T>[] arcs) throws IOException {
final Arc<T> arc = new Arc<T>();
getFirstArc(arc);
final BytesReader in = getBytesReader();
@@ -433,7 +442,7 @@ public final class FST<T> {
while(true) {
assert arc.label != END_LABEL;
if (arc.label < cachedRootArcs.length) {
- cachedRootArcs[arc.label] = new Arc<T>().copyFrom(arc);
+ arcs[arc.label] = new Arc<T>().copyFrom(arc);
} else {
break;
}
@@ -444,6 +453,38 @@ public final class FST<T> {
}
}
}
+
+ @SuppressWarnings({"rawtypes","unchecked"})
+ private boolean setAssertingRootArcs(Arc<T>[] arcs) throws IOException {
+ assertingCachedRootArcs = (Arc<T>[]) new Arc[arcs.length];
+ readRootArcs(assertingCachedRootArcs);
+ return true;
+ }
+
+ private boolean assertRootArcs() {
+ assert cachedRootArcs != null;
+ assert assertingCachedRootArcs != null;
+ for (int i = 0; i < cachedRootArcs.length; i++) {
+ final Arc<T> root = cachedRootArcs[i];
+ final Arc<T> asserting = assertingCachedRootArcs[i];
+ if (root != null) {
+ assert root.arcIdx == asserting.arcIdx;
+ assert root.bytesPerArc == asserting.bytesPerArc;
+ assert root.flags == asserting.flags;
+ assert root.label == asserting.label;
+ assert root.nextArc == asserting.nextArc;
+ assert root.nextFinalOutput.equals(asserting.nextFinalOutput);
+ assert root.node == asserting.node;
+ assert root.numArcs == asserting.numArcs;
+ assert root.output.equals(asserting.output);
+ assert root.posArcsStart == asserting.posArcsStart;
+ assert root.target == asserting.target;
+ } else {
+ assert root == null && asserting == null;
+ }
+ }
+ return true;
+ }
public T getEmptyOutput() {
return emptyOutput;
@@ -1099,7 +1140,7 @@ public final class FST<T> {
/** Finds an arc leaving the incoming arc, replacing the arc in place.
* This returns null if the arc was not found, else the incoming arc. */
public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
- assert cachedRootArcs != null;
+ assert assertRootArcs();
if (labelToMatch == END_LABEL) {
if (follow.isFinal()) {
@@ -1123,7 +1164,7 @@ public final class FST<T> {
if (follow.target == startNode && labelToMatch < cachedRootArcs.length) {
final Arc<T> result = cachedRootArcs[labelToMatch];
if (result == null) {
- return result;
+ return null;
} else {
arc.copyFrom(result);
return arc;