You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/09/23 03:38:23 UTC

svn commit: r1388942 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/fst/ lucene/core/src/test/org/apache/lucene/util/fst/ lucene/misc/ lucene/misc/src/java/org/apache/lucene/util/ lucene/misc/sr...

Author: mikemccand
Date: Sun Sep 23 01:38:22 2012
New Revision: 1388942

URL: http://svn.apache.org/viewvc?rev=1388942&view=rev
Log:
LUCENE-4404: add ListOfOutputs for FST to hold more than one output per input

Added:
    lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/util/
      - copied from r1388935, lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/util/
    lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/util/
      - copied from r1388935, lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/util/
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/
      - copied from r1388935, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/fst/
Removed:
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1388942&r1=1388941&r2=1388942&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sun Sep 23 01:38:22 2012
@@ -7,8 +7,12 @@ For "contrib" changes prior to 4.0, plea
 http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_6_0/lucene/contrib/CHANGES.txt
 
 ======================= Lucene 4.1.0 =======================
+New Features
 
-(No Changes)
+* LUCENE-4404: New ListOfOutputs (in lucene/misc) for FSTs wraps
+  another Outputs implementation, allowing you to store more than one
+  output for a single input.  UpToTwoPositiveIntsOutputs was moved
+  from lucene/core to lucene/misc.  (Mike McCandless)
 
 ======================= Lucene 4.0.0 =======================
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java?rev=1388942&r1=1388941&r2=1388942&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java Sun Sep 23 01:38:22 2012
@@ -399,8 +399,10 @@ public class Builder<T> {
     }
 
     final UnCompiledNode<T> lastNode = frontier[input.length];
-    lastNode.isFinal = true;
-    lastNode.output = NO_OUTPUT;
+    if (lastInput.length != input.length || prefixLenPlus1 != input.length + 1) {
+      lastNode.isFinal = true;
+      lastNode.output = NO_OUTPUT;
+    }
 
     // push conflicting outputs forward, only as far as
     // needed

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1388942&r1=1388941&r2=1388942&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Sun Sep 23 01:38:22 2012
@@ -296,11 +296,13 @@ public final class FST<T> {
       // messy
       bytes = new byte[numBytes];
       in.readBytes(bytes, 0, numBytes);
+      BytesReader reader;
       if (packed) {
-        emptyOutput = outputs.read(getBytesReader(0));
+        reader = getBytesReader(0);
       } else {
-        emptyOutput = outputs.read(getBytesReader(numBytes-1));
+        reader = getBytesReader(numBytes-1);
       }
+      emptyOutput = outputs.readFinalOutput(reader);
     } else {
       emptyOutput = null;
     }
@@ -414,7 +416,7 @@ public final class FST<T> {
     // TODO: this is messy -- replace with sillyBytesWriter; maybe make
     // bytes private
     final int posSave = writer.posWrite;
-    outputs.write(emptyOutput, writer);
+    outputs.writeFinalOutput(emptyOutput, writer);
     emptyOutputBytes = new byte[writer.posWrite-posSave];
 
     if (!packed) {
@@ -638,7 +640,7 @@ public final class FST<T> {
 
       if (arc.nextFinalOutput != NO_OUTPUT) {
         //System.out.println("    write final output");
-        outputs.write(arc.nextFinalOutput, writer);
+        outputs.writeFinalOutput(arc.nextFinalOutput, writer);
       }
 
       if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) {
@@ -788,7 +790,7 @@ public final class FST<T> {
             outputs.read(in);
           }
           if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
-            outputs.read(in);
+            outputs.readFinalOutput(in);
           }
           if (arc.flag(BIT_STOP_NODE)) {
           } else if (arc.flag(BIT_TARGET_NEXT)) {
@@ -963,7 +965,7 @@ public final class FST<T> {
     }
 
     if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
-      arc.nextFinalOutput = outputs.read(in);
+      arc.nextFinalOutput = outputs.readFinalOutput(in);
     } else {
       arc.nextFinalOutput = outputs.getNoOutput();
     }
@@ -1127,7 +1129,7 @@ public final class FST<T> {
       }
 
       if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) {
-        outputs.read(in);
+        outputs.readFinalOutput(in);
       }
 
       if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
@@ -1221,6 +1223,14 @@ public final class FST<T> {
     }
   }
 
+  /** Returns a {@link BytesReader} for this FST, positioned at
+   *  position 0. */
+  public BytesReader getBytesReader() {
+    return getBytesReader(0);
+  }
+
+  /** Returns a {@link BytesReader} for this FST, positioned at
+   *  the provided position. */
   public BytesReader getBytesReader(int pos) {
     // TODO: maybe re-use via ThreadLocal?
     if (packed) {
@@ -1654,7 +1664,7 @@ public final class FST<T> {
               }
             }
             if (arc.nextFinalOutput != NO_OUTPUT) {
-              outputs.write(arc.nextFinalOutput, writer);
+              outputs.writeFinalOutput(arc.nextFinalOutput, writer);
             }
 
             if (doWriteTarget) {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java?rev=1388942&r1=1388941&r2=1388942&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java Sun Sep 23 01:38:22 2012
@@ -49,10 +49,27 @@ public abstract class Outputs<T> {
   /** Eg add("foo", "bar") -> "foobar" */
   public abstract T add(T prefix, T output);
 
+  /** Encode an output value into a {@link DataOutput}. */
   public abstract void write(T output, DataOutput out) throws IOException;
 
+  /** Encode an final node output value into a {@link
+   *  DataOutput}.  By default this just calls {@link #write(Object,
+   *  DataOutput)}. */
+  public void writeFinalOutput(T output, DataOutput out) throws IOException {
+    write(output, out);
+  }
+
+  /** Decode an output value previously written with {@link
+   *  #write(Object, DataOutput)}. */
   public abstract T read(DataInput in) throws IOException;
 
+  /** Decode an output value previously written with {@link
+   *  #writeFinalOutput(Object, DataOutput)}.  By default this
+   *  just calls {@link #read(DataInput)}. */
+  public T readFinalOutput(DataInput in) throws IOException {
+    return read(in);
+  }
+
   /** NOTE: this output is compared with == so you must
    *  ensure that all methods return the single object if
    *  it's really no output */

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1388942&r1=1388941&r2=1388942&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Sun Sep 23 01:38:22 2012
@@ -29,8 +29,6 @@ import java.io.Writer;
 import java.util.*;
 
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DirectoryReader;
@@ -56,7 +54,6 @@ import org.apache.lucene.util.LineFileDo
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -67,6 +64,10 @@ import org.apache.lucene.util.fst.FST.By
 import org.apache.lucene.util.fst.PairOutputs.Pair;
 import org.apache.lucene.util.packed.PackedInts;
 
+import static org.apache.lucene.util.fst.FSTTester.getRandomString;
+import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
+import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
+
 @SuppressCodecs({ "SimpleText", "Memory", "Direct" })
 @Slow
 public class TestFSTs extends LuceneTestCase {
@@ -87,59 +88,6 @@ public class TestFSTs extends LuceneTest
     super.tearDown();
   }
 
-  private static BytesRef toBytesRef(IntsRef ir) {
-    BytesRef br = new BytesRef(ir.length);
-    for(int i=0;i<ir.length;i++) {
-      int x = ir.ints[ir.offset+i];
-      assert x >= 0 && x <= 255;
-      br.bytes[i] = (byte) x;
-    }
-    br.length = ir.length;
-    return br;
-  }
-
-  static IntsRef toIntsRef(String s, int inputMode) {
-    return toIntsRef(s, inputMode, new IntsRef(10));
-  }
-
-  static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
-    if (inputMode == 0) {
-      // utf8
-      return toIntsRef(new BytesRef(s), ir);
-    } else {
-      // utf32
-      return toIntsRefUTF32(s, ir);
-    }
-  }
-
-  static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
-    final int charLength = s.length();
-    int charIdx = 0;
-    int intIdx = 0;
-    while(charIdx < charLength) {
-      if (intIdx == ir.ints.length) {
-        ir.grow(intIdx+1);
-      }
-      final int utf32 = s.codePointAt(charIdx);
-      ir.ints[intIdx] = utf32;
-      charIdx += Character.charCount(utf32);
-      intIdx++;
-    }
-    ir.length = intIdx;
-    return ir;
-  }
-
-  static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
-    if (br.length > ir.ints.length) {
-      ir.grow(br.length);
-    }
-    for(int i=0;i<br.length;i++) {
-      ir.ints[i] = br.bytes[br.offset+i]&0xFF;
-    }
-    ir.length = br.length;
-    return ir;
-  }
-
   public void testBasicFSA() throws IOException {
     String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
     String[] strings2 = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
@@ -206,19 +154,6 @@ public class TestFSTs extends LuceneTest
     }
   }
 
-  private static String simpleRandomString(Random r) {
-    final int end = r.nextInt(10);
-    if (end == 0) {
-      // allow 0 length
-      return "";
-    }
-    final char[] buffer = new char[end];
-    for (int i = 0; i < end; i++) {
-      buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
-    }
-    return new String(buffer, 0, end);
-  }
-
   // given set of terms, test the different outputs for them
   private void doTest(int inputMode, IntsRef[] terms) throws IOException {
     Arrays.sort(terms);
@@ -231,7 +166,7 @@ public class TestFSTs extends LuceneTest
       for(IntsRef term : terms) {
         pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
       }
-      new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false).doTest();
+      new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
 
     // PositiveIntOutput (ord)
@@ -241,7 +176,7 @@ public class TestFSTs extends LuceneTest
       for(int idx=0;idx<terms.length;idx++) {
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], (long) idx));
       }
-      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, true).doTest();
+      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
     }
 
     // PositiveIntOutput (random monotonically increasing positive number)
@@ -255,7 +190,7 @@ public class TestFSTs extends LuceneTest
         lastOutput = value;
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], value));
       }
-      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, doShare).doTest();
+      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, doShare).doTest(true);
     }
 
     // PositiveIntOutput (random positive number)
@@ -265,7 +200,7 @@ public class TestFSTs extends LuceneTest
       for(int idx=0;idx<terms.length;idx++) {
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], _TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
       }
-      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, false).doTest();
+      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
 
     // Pair<ord, (random monotonically increasing positive number>
@@ -281,7 +216,7 @@ public class TestFSTs extends LuceneTest
         pairs.add(new FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>(terms[idx],
                                                                          outputs.newPair((long) idx, value)));
       }
-      new FSTTester<PairOutputs.Pair<Long,Long>>(random(), dir, inputMode, pairs, outputs, false).doTest();
+      new FSTTester<PairOutputs.Pair<Long,Long>>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
 
     // Sequence-of-bytes
@@ -293,7 +228,7 @@ public class TestFSTs extends LuceneTest
         final BytesRef output = random().nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
         pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
       }
-      new FSTTester<BytesRef>(random(), dir, inputMode, pairs, outputs, false).doTest();
+      new FSTTester<BytesRef>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
 
     // Sequence-of-ints
@@ -309,722 +244,11 @@ public class TestFSTs extends LuceneTest
         }
         pairs.add(new FSTTester.InputOutput<IntsRef>(terms[idx], output));
       }
-      new FSTTester<IntsRef>(random(), dir, inputMode, pairs, outputs, false).doTest();
+      new FSTTester<IntsRef>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
 
-    // Up to two positive ints, shared, generally but not
-    // monotonically increasing
-    {
-      if (VERBOSE) {
-        System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
-      }
-      final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
-      final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
-      long lastOutput = 0;
-      for(int idx=0;idx<terms.length;idx++) {
-        // Sometimes go backwards
-        long value = lastOutput + _TestUtil.nextInt(random(), -100, 1000);
-        while(value < 0) {
-          value = lastOutput + _TestUtil.nextInt(random(), -100, 1000);
-        }
-        final Object output;
-        if (random().nextInt(5) == 3) {
-          long value2 = lastOutput + _TestUtil.nextInt(random(), -100, 1000);
-          while(value2 < 0) {
-            value2 = lastOutput + _TestUtil.nextInt(random(), -100, 1000);
-          }
-          output = outputs.get(value, value2);
-        } else {
-          output = outputs.get(value);
-        }
-        pairs.add(new FSTTester.InputOutput<Object>(terms[idx], output));
-      }
-      new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false).doTest();
-    }
   }
 
-  private static class FSTTester<T> {
-
-    final Random random;
-    final List<InputOutput<T>> pairs;
-    final int inputMode;
-    final Outputs<T> outputs;
-    final Directory dir;
-    final boolean doReverseLookup;
-
-    public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs, boolean doReverseLookup) {
-      this.random = random;
-      this.dir = dir;
-      this.inputMode = inputMode;
-      this.pairs = pairs;
-      this.outputs = outputs;
-      this.doReverseLookup = doReverseLookup;
-    }
-
-    private static class InputOutput<T> implements Comparable<InputOutput<T>> {
-      public final IntsRef input;
-      public final T output;
-
-      public InputOutput(IntsRef input, T output) {
-        this.input = input;
-        this.output = output;
-      }
-
-      public int compareTo(InputOutput<T> other) {
-        if (other instanceof InputOutput) {
-          return input.compareTo((other).input);
-        } else {
-          throw new IllegalArgumentException();
-        }
-      }
-    }
-
-    public void doTest() throws IOException {
-      // no pruning
-      doTest(0, 0, true);
-
-      if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
-        // simple pruning
-        doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true);
-        
-        // leafy pruning
-        doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true);
-      }
-    }
-
-    // runs the term, returning the output, or null if term
-    // isn't accepted.  if prefixLength is non-null it must be
-    // length 1 int array; prefixLength[0] is set to the length
-    // of the term prefix that matches
-    private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
-      assert prefixLength == null || prefixLength.length == 1;
-      final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
-      final T NO_OUTPUT = fst.outputs.getNoOutput();
-      T output = NO_OUTPUT;
-      final FST.BytesReader fstReader = fst.getBytesReader(0);
-
-      for(int i=0;i<=term.length;i++) {
-        final int label;
-        if (i == term.length) {
-          label = FST.END_LABEL;
-        } else {
-          label = term.ints[term.offset+i];
-        }
-        // System.out.println("   loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
-        if (fst.findTargetArc(label, arc, arc, fstReader) == null) {
-          // System.out.println("    not found");
-          if (prefixLength != null) {
-            prefixLength[0] = i;
-            return output;
-          } else {
-            return null;
-          }
-        }
-        output = fst.outputs.add(output, arc.output);
-      }
-
-      if (prefixLength != null) {
-        prefixLength[0] = term.length;
-      }
-
-      return output;
-    }
-
-    private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
-      FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
-
-      final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
-      in.length = 0;
-      in.offset = 0;
-      final T NO_OUTPUT = fst.outputs.getNoOutput();
-      T output = NO_OUTPUT;
-      final FST.BytesReader fstReader = fst.getBytesReader(0);
-
-      while(true) {
-        // read all arcs:
-        fst.readFirstTargetArc(arc, arc, fstReader);
-        arcs.add(new FST.Arc<T>().copyFrom(arc));
-        while(!arc.isLast()) {
-          fst.readNextArc(arc, fstReader);
-          arcs.add(new FST.Arc<T>().copyFrom(arc));
-        }
-      
-        // pick one
-        arc = arcs.get(random.nextInt(arcs.size()));
-        arcs.clear();
-
-        // accumulate output
-        output = fst.outputs.add(output, arc.output);
-
-        // append label
-        if (arc.label == FST.END_LABEL) {
-          break;
-        }
-
-        if (in.ints.length == in.length) {
-          in.grow(1+in.length);
-        }
-        in.ints[in.length++] = arc.label;
-      }
-
-      return output;
-    }
-
-
-    FST<T> doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException {
-      if (VERBOSE) {
-        System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
-      }
-
-      final boolean willRewrite = random.nextBoolean();
-
-      final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
-                                                prune1, prune2,
-                                                prune1==0 && prune2==0,
-                                                allowRandomSuffixSharing ? random.nextBoolean() : true,
-                                                allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
-                                                outputs,
-                                                null,
-                                                willRewrite);
-
-      for(InputOutput<T> pair : pairs) {
-        if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
-          final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
-          final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
-          @SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
-          builderObject.add(pair.input, _outputs.get(twoLongs.first));
-          builderObject.add(pair.input, _outputs.get(twoLongs.second));
-        } else {
-          builder.add(pair.input, pair.output);
-        }
-      }
-      FST<T> fst = builder.finish();
-
-      if (random.nextBoolean() && fst != null && !willRewrite) {
-        IOContext context = LuceneTestCase.newIOContext(random);
-        IndexOutput out = dir.createOutput("fst.bin", context);
-        fst.save(out);
-        out.close();
-        IndexInput in = dir.openInput("fst.bin", context);
-        try {
-          fst = new FST<T>(in, outputs);
-        } finally {
-          in.close();
-          dir.deleteFile("fst.bin");
-        }
-      }
-
-      if (VERBOSE && pairs.size() <= 20 && fst != null) {
-        Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
-        Util.toDot(fst, w, false, false);
-        w.close();
-        System.out.println("SAVED out.dot");
-      }
-
-      if (VERBOSE) {
-        if (fst == null) {
-          System.out.println("  fst has 0 nodes (fully pruned)");
-        } else {
-          System.out.println("  fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs");
-        }
-      }
-
-      if (prune1 == 0 && prune2 == 0) {
-        verifyUnPruned(inputMode, fst);
-      } else {
-        verifyPruned(inputMode, fst, prune1, prune2);
-      }
-
-      if (willRewrite && fst != null) {
-        if (VERBOSE) {
-          System.out.println("TEST: now rewrite");
-        }
-        final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat());
-        if (VERBOSE) {
-          System.out.println("TEST: now verify packed FST");
-        }
-        if (prune1 == 0 && prune2 == 0) {
-          verifyUnPruned(inputMode, packed);
-        } else {
-          verifyPruned(inputMode, packed, prune1, prune2);
-        }
-      }
-
-      return fst;
-    }
-
-    // FST is complete
-    private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
-
-      final FST<Long> fstLong;
-      final Set<Long> validOutputs;
-      long minLong = Long.MAX_VALUE;
-      long maxLong = Long.MIN_VALUE;
-
-      if (doReverseLookup) {
-        @SuppressWarnings("unchecked") FST<Long> fstLong0 = (FST<Long>) fst;
-        fstLong = fstLong0;
-        validOutputs = new HashSet<Long>();
-        for(InputOutput<T> pair: pairs) {
-          Long output = (Long) pair.output;
-          maxLong = Math.max(maxLong, output);
-          minLong = Math.min(minLong, output);
-          validOutputs.add(output);
-        }
-      } else {
-        fstLong = null;
-        validOutputs = null;
-      }
-
-      if (pairs.size() == 0) {
-        assertNull(fst);
-        return;
-      }
-
-      if (VERBOSE) {
-        System.out.println("TEST: now verify " + pairs.size() + " terms");
-        for(InputOutput<T> pair : pairs) {
-          assertNotNull(pair);
-          assertNotNull(pair.input);
-          assertNotNull(pair.output);
-          System.out.println("  " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
-        }
-      }
-
-      assertNotNull(fst);
-
-      // visit valid pairs in order -- make sure all words
-      // are accepted, and FSTEnum's next() steps through
-      // them correctly
-      if (VERBOSE) {
-        System.out.println("TEST: check valid terms/next()");
-      }
-      {
-        IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
-        for(InputOutput<T> pair : pairs) {
-          IntsRef term = pair.input;
-          if (VERBOSE) {
-            System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
-          }
-          Object output = run(fst, term, null);
-          assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
-          assertEquals(pair.output, output);
-
-          // verify enum's next
-          IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
-          assertNotNull(t);
-          assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
-          assertEquals(pair.output, t.output);
-        }
-        assertNull(fstEnum.next());
-      }
-
-      final Map<IntsRef,T> termsMap = new HashMap<IntsRef,T>();
-      for(InputOutput<T> pair : pairs) {
-        termsMap.put(pair.input, pair.output);
-      }
-
-      if (doReverseLookup && maxLong > minLong) {
-        // Do random lookups so we test null (output doesn't
-        // exist) case:
-        assertNull(Util.getByOutput(fstLong, minLong-7));
-        assertNull(Util.getByOutput(fstLong, maxLong+7));
-
-        final int num = atLeast(100);
-        for(int iter=0;iter<num;iter++) {
-          Long v = _TestUtil.nextLong(random, minLong, maxLong);
-          IntsRef input = Util.getByOutput(fstLong, v);
-          assertTrue(validOutputs.contains(v) || input == null);
-        }
-      }
-
-      // find random matching word and make sure it's valid
-      if (VERBOSE) {
-        System.out.println("TEST: verify random accepted terms");
-      }
-      final IntsRef scratch = new IntsRef(10);
-      int num = atLeast(500);
-      for(int iter=0;iter<num;iter++) {
-        T output = randomAcceptedWord(fst, scratch);
-        assertTrue("accepted word " + inputToString(inputMode, scratch) + " is not valid", termsMap.containsKey(scratch));
-        assertEquals(termsMap.get(scratch), output);
-
-        if (doReverseLookup) {
-          //System.out.println("lookup output=" + output + " outs=" + fst.outputs);
-          IntsRef input = Util.getByOutput(fstLong, (Long) output);
-          assertNotNull(input);
-          //System.out.println("  got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
-          assertEquals(scratch, input);
-        }
-      }
-    
-      // test IntsRefFSTEnum.seek:
-      if (VERBOSE) {
-        System.out.println("TEST: verify seek");
-      }
-      IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
-      num = atLeast(100);
-      for(int iter=0;iter<num;iter++) {
-        if (VERBOSE) {
-          System.out.println("  iter=" + iter);
-        }
-        if (random.nextBoolean()) {
-          // seek to term that doesn't exist:
-          while(true) {
-            final IntsRef term = toIntsRef(getRandomString(random), inputMode);
-            int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
-            if (pos < 0) {
-              pos = -(pos+1);
-              // ok doesn't exist
-              //System.out.println("  seek " + inputToString(inputMode, term));
-              final IntsRefFSTEnum.InputOutput<T> seekResult;
-              if (random.nextInt(3) == 0) {
-                if (VERBOSE) {
-                  System.out.println("  do non-exist seekExact term=" + inputToString(inputMode, term));
-                }
-                seekResult = fstEnum.seekExact(term);
-                pos = -1;
-              } else if (random.nextBoolean()) {
-                if (VERBOSE) {
-                  System.out.println("  do non-exist seekFloor term=" + inputToString(inputMode, term));
-                }
-                seekResult = fstEnum.seekFloor(term);
-                pos--;
-              } else {
-                if (VERBOSE) {
-                  System.out.println("  do non-exist seekCeil term=" + inputToString(inputMode, term));
-                }
-                seekResult = fstEnum.seekCeil(term);
-              }
-
-              if (pos != -1 && pos < pairs.size()) {
-                //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
-                assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
-                if (VERBOSE) {
-                  System.out.println("    got " + inputToString(inputMode, seekResult.input));
-                }
-                assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
-                assertEquals(pairs.get(pos).output, seekResult.output);
-              } else {
-                // seeked before start or beyond end
-                //System.out.println("seek=" + seekTerm);
-                assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
-                if (VERBOSE) {
-                  System.out.println("    got null");
-                }
-              }
-
-              break;
-            }
-          }
-        } else {
-          // seek to term that does exist:
-          InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
-          final IntsRefFSTEnum.InputOutput<T> seekResult;
-          if (random.nextInt(3) == 2) {
-            if (VERBOSE) {
-              System.out.println("  do exists seekExact term=" + inputToString(inputMode, pair.input));
-            }
-            seekResult = fstEnum.seekExact(pair.input);
-          } else if (random.nextBoolean()) {
-            if (VERBOSE) {
-              System.out.println("  do exists seekFloor " + inputToString(inputMode, pair.input));
-            }
-            seekResult = fstEnum.seekFloor(pair.input);
-          } else {
-            if (VERBOSE) {
-              System.out.println("  do exists seekCeil " + inputToString(inputMode, pair.input));
-            }
-            seekResult = fstEnum.seekCeil(pair.input);
-          }
-          assertNotNull(seekResult);
-          assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
-          assertEquals(pair.output, seekResult.output);
-        }
-      }
-
-      if (VERBOSE) {
-        System.out.println("TEST: mixed next/seek");
-      }
-
-      // test mixed next/seek
-      num = atLeast(100);
-      for(int iter=0;iter<num;iter++) {
-        if (VERBOSE) {
-          System.out.println("TEST: iter " + iter);
-        }
-        // reset:
-        fstEnum = new IntsRefFSTEnum<T>(fst);
-        int upto = -1;
-        while(true) {
-          boolean isDone = false;
-          if (upto == pairs.size()-1 || random.nextBoolean()) {
-            // next
-            upto++;
-            if (VERBOSE) {
-              System.out.println("  do next");
-            }
-            isDone = fstEnum.next() == null;
-          } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) {
-            int attempt = 0;
-            for(;attempt<10;attempt++) {
-              IntsRef term = toIntsRef(getRandomString(random), inputMode);
-              if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
-                int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
-                assert pos < 0;
-                upto = -(pos+1);
-
-                if (random.nextBoolean()) {
-                  upto--;
-                  assertTrue(upto != -1);
-                  if (VERBOSE) {
-                    System.out.println("  do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
-                  }
-                  isDone = fstEnum.seekFloor(term) == null;
-                } else {
-                  if (VERBOSE) {
-                    System.out.println("  do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
-                  }
-                  isDone = fstEnum.seekCeil(term) == null;
-                }
-
-                break;
-              }
-            }
-            if (attempt == 10) {
-              continue;
-            }
-            
-          } else {
-            final int inc = random.nextInt(pairs.size() - upto - 1);
-            upto += inc;
-            if (upto == -1) {
-              upto = 0;
-            }
-
-            if (random.nextBoolean()) {
-              if (VERBOSE) {
-                System.out.println("  do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
-              }
-              isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
-            } else {
-              if (VERBOSE) {
-                System.out.println("  do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
-              }
-              isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
-            }
-          }
-          if (VERBOSE) {
-            if (!isDone) {
-              System.out.println("    got " + inputToString(inputMode, fstEnum.current().input));
-            } else {
-              System.out.println("    got null");
-            }
-          }
-
-          if (upto == pairs.size()) {
-            assertTrue(isDone);
-            break;
-          } else {
-            assertFalse(isDone);
-            assertEquals(pairs.get(upto).input, fstEnum.current().input);
-            assertEquals(pairs.get(upto).output, fstEnum.current().output);
-
-            /*
-            if (upto < pairs.size()-1) {
-              int tryCount = 0;
-              while(tryCount < 10) {
-                final IntsRef t = toIntsRef(getRandomString(), inputMode);
-                if (pairs.get(upto).input.compareTo(t) < 0) {
-                  final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
-                  if (VERBOSE) {
-                    System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
-                  }
-                  assertEquals(expected, fstEnum.beforeNext(t));
-                  break;
-                }
-                tryCount++;
-              }
-            }
-            */
-          }
-        }
-      }
-    }
-
-    private static class CountMinOutput<T> {
-      int count;
-      T output;
-      T finalOutput;
-      boolean isLeaf = true;
-      boolean isFinal;
-    }
-
-    // FST is pruned
-    private void verifyPruned(int inputMode, FST<T> fst, int prune1, int prune2) throws IOException {
-
-      if (VERBOSE) {
-        System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs);
-        for(InputOutput<T> pair : pairs) {
-          System.out.println("  " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
-        }
-      }
-
-      // To validate the FST, we brute-force compute all prefixes
-      // in the terms, matched to their "common" outputs, prune that
-      // set according to the prune thresholds, then assert the FST
-      // matches that same set.
-
-      // NOTE: Crazy RAM intensive!!
-
-      //System.out.println("TEST: tally prefixes");
-
-      // build all prefixes
-      final Map<IntsRef,CountMinOutput<T>> prefixes = new HashMap<IntsRef,CountMinOutput<T>>();
-      final IntsRef scratch = new IntsRef(10);
-      for(InputOutput<T> pair: pairs) {
-        scratch.copyInts(pair.input);
-        for(int idx=0;idx<=pair.input.length;idx++) {
-          scratch.length = idx;
-          CountMinOutput<T> cmo = prefixes.get(scratch);
-          if (cmo == null) {
-            cmo = new CountMinOutput<T>();
-            cmo.count = 1;
-            cmo.output = pair.output;
-            prefixes.put(IntsRef.deepCopyOf(scratch), cmo);
-          } else {
-            cmo.count++;
-            T output1 = cmo.output;
-            if (output1.equals(outputs.getNoOutput())) {
-              output1 = outputs.getNoOutput();
-            }
-            T output2 = pair.output;
-            if (output2.equals(outputs.getNoOutput())) {
-              output2 = outputs.getNoOutput();
-            }
-            cmo.output = outputs.common(output1, output2);
-          }
-          if (idx == pair.input.length) {
-            cmo.isFinal = true;
-            cmo.finalOutput = cmo.output;
-          }
-        }
-      }
-
-      if (VERBOSE) {
-        System.out.println("TEST: now prune");
-      }
-
-      // prune 'em
-      final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
-      while(it.hasNext()) {
-        Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
-        final IntsRef prefix = ent.getKey();
-        final CountMinOutput<T> cmo = ent.getValue();
-        if (VERBOSE) {
-          System.out.println("  term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
-        }
-        final boolean keep;
-        if (prune1 > 0) {
-          keep = cmo.count >= prune1;
-        } else {
-          assert prune2 > 0;
-          if (prune2 > 1 && cmo.count >= prune2) {
-            keep = true;
-          } else if (prefix.length > 0) {
-            // consult our parent
-            scratch.length = prefix.length-1;
-            System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length);
-            final CountMinOutput<T> cmo2 = prefixes.get(scratch);
-            //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
-            keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
-          } else if (cmo.count >= prune2) {
-            keep = true;
-          } else {
-            keep = false;
-          }
-        }
-
-        if (!keep) {
-          it.remove();
-          //System.out.println("    remove");
-        } else {
-          // clear isLeaf for all ancestors
-          //System.out.println("    keep");
-          scratch.copyInts(prefix);
-          scratch.length--;
-          while(scratch.length >= 0) {
-            final CountMinOutput<T> cmo2 = prefixes.get(scratch);
-            if (cmo2 != null) {
-              //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
-              cmo2.isLeaf = false;
-            }
-            scratch.length--;
-          }
-        }
-      }
-
-      if (VERBOSE) {
-        System.out.println("TEST: after prune");
-        for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
-          System.out.println("  " + inputToString(inputMode, ent.getKey(), false) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
-          if (ent.getValue().isFinal) {
-            System.out.println("    finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
-          }
-        }
-      }
-
-      if (prefixes.size() <= 1) {
-        assertNull(fst);
-        return;
-      }
-
-      assertNotNull(fst);
-
-      // make sure FST only enums valid prefixes
-      if (VERBOSE) {
-        System.out.println("TEST: check pruned enum");
-      }
-      IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
-      IntsRefFSTEnum.InputOutput<T> current;
-      while((current = fstEnum.next()) != null) {
-        if (VERBOSE) {
-          System.out.println("  fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output));
-        }
-        final CountMinOutput<T> cmo = prefixes.get(current.input);
-        assertNotNull(cmo);
-        assertTrue(cmo.isLeaf || cmo.isFinal);
-        //if (cmo.isFinal && !cmo.isLeaf) {
-        if (cmo.isFinal) {
-          assertEquals(cmo.finalOutput, current.output);
-        } else {
-          assertEquals(cmo.output, current.output);
-        }
-      }
-
-      // make sure all non-pruned prefixes are present in the FST
-      if (VERBOSE) {
-        System.out.println("TEST: verify all prefixes");
-      }
-      final int[] stopNode = new int[1];
-      for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
-        if (ent.getKey().length > 0) {
-          final CountMinOutput<T> cmo = ent.getValue();
-          final T output = run(fst, ent.getKey(), stopNode);
-          if (VERBOSE) {
-            System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output));
-          }
-          // if (cmo.isFinal && !cmo.isLeaf) {
-          if (cmo.isFinal) {
-            assertEquals(cmo.finalOutput, output);
-          } else {
-            assertEquals(cmo.output, output);
-          }
-          assertEquals(ent.getKey().length, stopNode[0]);
-        }
-      }
-    }
-  }
 
   public void testRandomWords() throws IOException {
     testRandomWords(1000, atLeast(2));
@@ -1058,40 +282,11 @@ public class TestFSTs extends LuceneTest
     }
   }
 
-  static String getRandomString(Random random) {
-    final String term;
-    if (random.nextBoolean()) {
-      term = _TestUtil.randomRealisticUnicodeString(random);
-    } else {
-      // we want to mix in limited-alphabet symbols so
-      // we get more sharing of the nodes given how few
-      // terms we are testing...
-      term = simpleRandomString(random);
-    }
-    return term;
-  }
-
   @Nightly
   public void testBigSet() throws IOException {
     testRandomWords(_TestUtil.nextInt(random(), 50000, 60000), 1);
   }
   
-  static String inputToString(int inputMode, IntsRef term) {
-    return inputToString(inputMode, term, true);
-  }
-
-  private static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
-    if (!isValidUnicode) {
-      return term.toString();
-    } else if (inputMode == 0) {
-      // utf8
-      return toBytesRef(term).utf8ToString() + " " + term;
-    } else {
-      // utf32
-      return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
-    }
-  }
-
   // Build FST for all unique terms in the test line docs
   // file, up until a time limit
   public void testRealTerms() throws Exception {