You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/01/08 21:15:27 UTC
svn commit: r1430480 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/
lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/memory/
lucene/core/ lucene...
Author: mikemccand
Date: Tue Jan 8 20:15:26 2013
New Revision: 1430480
URL: http://svn.apache.org/viewvc?rev=1430480&view=rev
Log:
LUCENE-4593: move allowArrayArcs to ctor
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
lucene/dev/branches/branch_4x/lucene/codecs/ (props changed)
lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Tue Jan 8 20:15:26 2013
@@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
- Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true);
+ Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0
String lastValue = null;
Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Tue Jan 8 20:15:26 2013
@@ -113,7 +113,7 @@ public final class MemoryPostingsFormat
this.field = field;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
- builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio);
+ builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
}
private class PostingsWriter extends PostingsConsumer {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Tue Jan 8 20:15:26 2013
@@ -419,7 +419,7 @@ public class BlockTreeTermsWriter extend
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
- outputs, null, false);
+ outputs, null, false, true);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}
@@ -962,7 +962,7 @@ public class BlockTreeTermsWriter extend
0, 0, true,
true, Integer.MAX_VALUE,
noOutputs,
- new FindBlocks(), false);
+ new FindBlocks(), false, true);
postingsWriter.setField(fieldInfo);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java Tue Jan 8 20:15:26 2013
@@ -84,11 +84,11 @@ public class Builder<T> {
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
- * boolean, int, Outputs, FreezeTail, boolean)} with
+ * boolean, int, Outputs, FreezeTail, boolean, boolean)} with
* pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
- this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT);
+ this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
}
/**
@@ -97,9 +97,9 @@ public class Builder<T> {
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
- FreezeTail<T> freezeTail, boolean willPackFST) {
+ FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
- shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT);
+ shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
}
/**
@@ -143,10 +143,14 @@ public class Builder<T> {
*
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
* is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
+ *
+ * @param allowArrayArcs Pass false to disable the array arc optimization
+ * while building the FST; this will make the resulting
+ * FST smaller but slower to traverse.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
- FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio) {
+ FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
this.freezeTail = freezeTail;
@@ -154,7 +158,7 @@ public class Builder<T> {
this.shareMaxTailLength = shareMaxTailLength;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
- fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio);
+ fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
if (doShareSuffix) {
dedupHash = new NodeHash<T>(fst);
} else {
@@ -182,13 +186,6 @@ public class Builder<T> {
return dedupHash == null ? 0 : fst.nodeCount;
}
- /** Pass false to disable the array arc optimization
- * while building the FST; this will make the resulting
- * FST smaller but slower to traverse. */
- public void setAllowArrayArcs(boolean b) {
- fst.setAllowArrayArcs(b);
- }
-
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
final int node;
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Tue Jan 8 20:15:26 2013
@@ -146,6 +146,10 @@ public final class FST<T> {
public final Outputs<T> outputs;
+ // Used for the BIT_TARGET_NEXT optimization (whereby
+ // instead of storing the address of the target node for
+ // a given arc, we mark a single bit noting that the next
+ // node in the byte[] is the target node):
private int lastFrozenNode;
private final T NO_OUTPUT;
@@ -160,7 +164,7 @@ public final class FST<T> {
/** If arc has this label then that arc is final/accepted */
public static final int END_LABEL = -1;
- private boolean allowArrayArcs = true;
+ private final boolean allowArrayArcs;
private Arc<T> cachedRootArcs[];
@@ -261,9 +265,10 @@ public final class FST<T> {
// make a new empty FST, for building; Builder invokes
// this ctor
- FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio) {
+ FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
this.inputType = inputType;
this.outputs = outputs;
+ this.allowArrayArcs = allowArrayArcs;
bytes = new byte[128];
NO_OUTPUT = outputs.getNoOutput();
if (willPackFST) {
@@ -335,6 +340,11 @@ public final class FST<T> {
NO_OUTPUT = outputs.getNoOutput();
cacheRootArcs();
+
+ // NOTE: bogus because this is only used during
+ // building; we need to break out mutable FST from
+ // immutable
+ allowArrayArcs = false;
}
public INPUT_TYPE getInputType() {
@@ -1160,10 +1170,6 @@ public final class FST<T> {
return arcWithOutputCount;
}
- public void setAllowArrayArcs(boolean v) {
- allowArrayArcs = v;
- }
-
/**
* Nodes will be expanded if their depth (distance from the root node) is
* <= this value and their number of arcs is >=
@@ -1453,6 +1459,11 @@ public final class FST<T> {
this.outputs = outputs;
NO_OUTPUT = outputs.getNoOutput();
writer = new DefaultBytesWriter();
+
+ // NOTE: bogus because this is only used during
+ // building; we need to break out mutable FST from
+ // immutable
+ allowArrayArcs = false;
}
/** Expert: creates an FST by packing this one. This
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Tue Jan 8 20:15:26 2013
@@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTest
final boolean doRewrite = random().nextBoolean();
- Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite);
+ Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, true);
boolean storeOrd = random().nextBoolean();
if (VERBOSE) {
@@ -453,8 +453,7 @@ public class TestFSTs extends LuceneTest
this.outputs = outputs;
this.doPack = doPack;
- builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack);
- builder.setAllowArrayArcs(!noArcArrays);
+ builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, !noArcArrays);
}
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@@ -1063,7 +1062,7 @@ public class TestFSTs extends LuceneTest
public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
- final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean());
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), true);
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
builder.add(Util.toUTF32("station", new IntsRef()), 10L);
final FST<Long> fst = builder.finish();
@@ -1078,7 +1077,7 @@ public class TestFSTs extends LuceneTest
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final boolean willRewrite = random().nextBoolean();
- final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite);
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, true);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();
@@ -1101,7 +1100,7 @@ public class TestFSTs extends LuceneTest
final Long nothing = outputs.getNoOutput();
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
- final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT);
+ final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true);
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java Tue Jan 8 20:15:26 2013
@@ -237,7 +237,7 @@ public class FSTCompletionBuilder {
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<Object>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
- shareMaxTailLength, outputs, null, false);
+ shareMaxTailLength, outputs, null, false, true);
BytesRef scratch = new BytesRef();
BytesRef entry;
Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java?rev=1430480&r1=1430479&r2=1430480&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java Tue Jan 8 20:15:26 2013
@@ -287,7 +287,8 @@ public class FSTTester<T> {
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
outputs,
null,
- willRewrite);
+ willRewrite,
+ true);
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) {