You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/07/24 20:23:07 UTC

svn commit: r1613235 [1/3] - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/ lucene...

Author: mikemccand
Date: Thu Jul 24 18:23:06 2014
New Revision: 1613235

URL: http://svn.apache.org/r1613235
Log:
LUCENE-5841: simplify how block tree terms dict assigns terms to blocks

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1613235&r1=1613234&r2=1613235&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Jul 24 18:23:06 2014
@@ -95,6 +95,9 @@ Optimizations
 * LUCENE-5834: Empty sorted set and numeric doc values are now singletons.
   (Adrien Grand)
 
+* LUCENE-5841: Improve performance of block tree terms dictionary when
+  assigning terms to blocks.  (Mike McCandless)
+
 Bug Fixes
 
 * LUCENE-5796: Fixes the Scorer.getChildren() method for two combinations 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1613235&r1=1613234&r2=1613235&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Thu Jul 24 18:23:06 2014
@@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder 
     System.out.println("  encode...");
 
     PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
+    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, PackedInts.DEFAULT, true, 15);
     IntsRef scratch = new IntsRef();
     long ord = -1; // first ord will be 0
     String lastValue = null;

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java?rev=1613235&r1=1613234&r2=1613235&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java Thu Jul 24 18:23:06 2014
@@ -29,7 +29,7 @@ import org.apache.lucene.codecs.Postings
 import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.codecs.TermsConsumer;
-import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; // javadocs
 import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
@@ -46,10 +46,10 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.NoOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -93,8 +93,6 @@ import org.apache.lucene.util.packed.Pac
 
 public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
 
-  // private static boolean DEBUG = IDOrdsSegmentTermsEnum.DEBUG;
-
   static final FSTOrdsOutputs FST_OUTPUTS = new FSTOrdsOutputs();
 
   static final Output NO_OUTPUT = FST_OUTPUTS.getNoOutput();
@@ -109,7 +107,7 @@ public final class OrdsBlockTreeTermsWri
    *  #OrdsBlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
   public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
 
-  //public final static boolean DEBUG = false;
+  // public final static boolean DEBUG = false;
   //private final static boolean SAVE_DOT_FILES = false;
 
   static final int OUTPUT_FLAGS_NUM_BITS = 2;
@@ -252,22 +250,42 @@ public final class OrdsBlockTreeTermsWri
   }
 
   private static final class PendingTerm extends PendingEntry {
-    public final BytesRef term;
+    public final byte[] termBytes;
     // stats + metadata
     public final BlockTermState state;
 
     public PendingTerm(BytesRef term, BlockTermState state) {
       super(true);
-      this.term = term;
+      this.termBytes = new byte[term.length];
+      System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
       this.state = state;
     }
 
     @Override
     public String toString() {
-      return term.utf8ToString();
+      return brToString(termBytes);
+    }
+  }
+
+  // for debugging
+  @SuppressWarnings("unused")
+  static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
     }
   }
 
+  // for debugging
+  @SuppressWarnings("unused")
+  static String brToString(byte[] b) {
+    return brToString(new BytesRef(b));
+  }
+
   private static final class SubIndex {
     public final FST<Output> index;
     public final long termOrdStart;
@@ -288,7 +306,6 @@ public final class OrdsBlockTreeTermsWri
     public final int floorLeadByte;
     public long totFloorTermCount;
     private final long totalTermCount;
-    private final IntsRef scratchIntsRef = new IntsRef();
 
     public PendingBlock(BytesRef prefix, long fp, boolean hasTerms, long totalTermCount,
                         boolean isFloor, int floorLeadByte, List<SubIndex> subIndices) {
@@ -297,6 +314,7 @@ public final class OrdsBlockTreeTermsWri
       this.fp = fp;
       this.hasTerms = hasTerms;
       this.totalTermCount = totalTermCount;
+      assert totalTermCount > 0;
       this.isFloor = isFloor;
       this.floorLeadByte = floorLeadByte;
       this.subIndices = subIndices;
@@ -304,25 +322,26 @@ public final class OrdsBlockTreeTermsWri
 
     @Override
     public String toString() {
-      return "BLOCK: " + prefix.utf8ToString();
+      return "BLOCK: " + brToString(prefix);
     }
 
-    public void compileIndex(List<PendingBlock> floorBlocks, RAMOutputStream scratchBytes) throws IOException {
+    public void compileIndex(List<PendingBlock> blocks, RAMOutputStream scratchBytes, IntsRef scratchIntsRef) throws IOException {
 
-      assert (isFloor && floorBlocks != null && floorBlocks.size() != 0) || (!isFloor && floorBlocks == null): "isFloor=" + isFloor + " floorBlocks=" + floorBlocks;
+      assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1): "isFloor=" + isFloor + " blocks=" + blocks;
+      assert this == blocks.get(0);
 
       assert scratchBytes.getFilePointer() == 0;
 
       // TODO: try writing the leading vLong in MSB order
       // (opposite of what Lucene does today), for better
       // outputs sharing in the FST
-      //System.out.println("\ncompileIndex isFloor=" + isFloor  + " numTerms=" + totalTermCount);
       long lastSumTotalTermCount = 0;
       long sumTotalTermCount = totalTermCount;
       scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
       if (isFloor) {
-        scratchBytes.writeVInt(floorBlocks.size());
-        for (PendingBlock sub : floorBlocks) {
+        scratchBytes.writeVInt(blocks.size()-1);
+        for (int i=1;i<blocks.size();i++) {
+          PendingBlock sub = blocks.get(i);
           assert sub.floorLeadByte != -1;
           //if (DEBUG) {
           //  System.out.println("    write floorLeadByte=" + Integer.toHexString(sub.floorLeadByte&0xff));
@@ -339,7 +358,7 @@ public final class OrdsBlockTreeTermsWri
 
       final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
                                                          0, 0, true, false, Integer.MAX_VALUE,
-                                                         FST_OUTPUTS, null, false,
+                                                         FST_OUTPUTS, false,
                                                          PackedInts.COMPACT, true, 15);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
@@ -356,31 +375,22 @@ public final class OrdsBlockTreeTermsWri
 
       // Copy over index for all sub-blocks
 
-      if (subIndices != null) {
-        for(SubIndex subIndex : subIndices) {
-          //System.out.println("  append subIndex: termOrdStart=" + subIndex.termOrdStart);
-          append(indexBuilder, subIndex.index, subIndex.termOrdStart);
-        }
-      }
-
-      if (floorBlocks != null) {
-        long termOrdOffset = totalTermCount;
-        for (PendingBlock sub : floorBlocks) {
-          if (sub.subIndices != null) {
-            for(SubIndex subIndex : sub.subIndices) {
-              append(indexBuilder, subIndex.index, termOrdOffset + subIndex.termOrdStart);
-            }
+      long termOrdOffset = 0;
+      for(PendingBlock block : blocks) {
+        if (block.subIndices != null) {
+          for(SubIndex subIndex : block.subIndices) {
+            append(indexBuilder, subIndex.index, termOrdOffset + subIndex.termOrdStart, scratchIntsRef);
           }
-          sub.subIndices = null;
-          termOrdOffset += sub.totalTermCount;
+          block.subIndices = null;
         }
-        totFloorTermCount = termOrdOffset;
-      } else {
-        totFloorTermCount = sumTotalTermCount;
+        termOrdOffset += block.totalTermCount;
       }
+      totFloorTermCount = termOrdOffset;
+
+      assert sumTotalTermCount == totFloorTermCount;
 
       index = indexBuilder.finish();
-      subIndices = null;
+      assert subIndices == null;
 
       /*
       Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
@@ -393,7 +403,7 @@ public final class OrdsBlockTreeTermsWri
     // TODO: maybe we could add bulk-add method to
     // Builder?  Takes FST and unions it w/ current
     // FST.
-    private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset) throws IOException {
+    private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRef scratchIntsRef) throws IOException {
       final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
       BytesRefFSTEnum.InputOutput<Output> indexEnt;
       while ((indexEnt = subIndexEnum.next()) != null) {
@@ -409,7 +419,8 @@ public final class OrdsBlockTreeTermsWri
     }
   }
 
-  final RAMOutputStream scratchBytes = new RAMOutputStream();
+  private final RAMOutputStream scratchBytes = new RAMOutputStream();
+  private final IntsRef scratchIntsRef = new IntsRef();
 
   class TermsWriter extends TermsConsumer {
     private final FieldInfo fieldInfo;
@@ -420,398 +431,202 @@ public final class OrdsBlockTreeTermsWri
     int docCount;
     long indexStartFP;
 
-    // Used only to partition terms into the block tree; we
-    // don't pull an FST from this builder:
-    private final NoOutputs noOutputs;
-    private final Builder<Object> blockBuilder;
-
-    // PendingTerm or PendingBlock:
+    // Records index into pending where the current prefix at that
+    // length "started"; for example, if current term starts with 't',
+    // startsByPrefix[0] is the index into pending for the first
+    // term/sub-block starting with 't'.  We use this to figure out when
+    // to write a new block:
+    private final BytesRef lastTerm = new BytesRef();
+    private int[] prefixStarts = new int[8];
+
+    private final long[] longs;
+
+    // Pending stack of terms and blocks.  As terms arrive (in sorted order)
+    // we append to this stack, and once the top of the stack has enough
+    // terms starting with a common prefix, write write a new block with
+    // those terms and replace those terms in the stack with a new block:
     private final List<PendingEntry> pending = new ArrayList<>();
 
-    // Index into pending of most recently written block
-    private int lastBlockIndex = -1;
-
-    // Re-used when segmenting a too-large block into floor
-    // blocks:
-    private int[] subBytes = new int[10];
-    private int[] subTermCounts = new int[10];
-    private int[] subTermCountSums = new int[10];
-    private int[] subSubCounts = new int[10];
+    // Reused in writeBlocks:
+    private final List<PendingBlock> newBlocks = new ArrayList<>();
 
     private BytesRef minTerm;
     private BytesRef maxTerm = new BytesRef();
 
-    // This class assigns terms to blocks "naturally", ie,
-    // according to the number of terms under a given prefix
-    // that we encounter:
-    private class FindBlocks extends Builder.FreezeTail<Object> {
+    /** Writes the top count entries in pending, using prevTerm to compute the prefix. */
+    void writeBlocks(int prefixLength, int count) throws IOException {
 
-      @Override
-      public void freeze(final Builder.UnCompiledNode<Object>[] frontier, int prefixLenPlus1, final IntsRef lastInput) throws IOException {
+      assert count > 0;
 
-        //if (DEBUG) System.out.println("  freeze prefixLenPlus1=" + prefixLenPlus1);
-
-        for(int idx=lastInput.length; idx >= prefixLenPlus1; idx--) {
-          final Builder.UnCompiledNode<Object> node = frontier[idx];
-
-          long totCount = 0;
+      /*
+      if (DEBUG) {
+        BytesRef br = new BytesRef(lastTerm.bytes);
+        br.offset = lastTerm.offset;
+        br.length = prefixLength;
+        System.out.println("writeBlocks: " + br.utf8ToString() + " count=" + count);
+      }
+      */
 
-          if (node.isFinal) {
-            totCount++;
-          }
+      // Root block better write all remaining pending entries:
+      assert prefixLength > 0 || count == pending.size();
 
-          for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
-            @SuppressWarnings("unchecked") final Builder.UnCompiledNode<Object> target = (Builder.UnCompiledNode<Object>) node.arcs[arcIdx].target;
-            totCount += target.inputCount;
-            target.clear();
-            node.arcs[arcIdx].target = null;
-          }
-          node.numArcs = 0;
+      int lastSuffixLeadLabel = -1;
 
-          if (totCount >= minItemsInBlock || idx == 0) {
-            // We are on a prefix node that has enough
-            // entries (terms or sub-blocks) under it to let
-            // us write a new block or multiple blocks (main
-            // block + follow on floor blocks):
-            //if (DEBUG) {
-            //  if (totCount < minItemsInBlock && idx != 0) {
-            //    System.out.println("  force block has terms");
-            //  }
-            //}
-            writeBlocks(lastInput, idx, (int) totCount);
-            node.inputCount = 1;
-          } else {
-            // stragglers!  carry count upwards
-            node.inputCount = totCount;
-          }
-          frontier[idx] = new Builder.UnCompiledNode<>(blockBuilder, idx);
-        }
-      }
-    }
+      // True if we saw at least one term in this block (we record if a block
+      // only points to sub-blocks in the terms index so we can avoid seeking
+      // to it when we are looking for a term):
+      boolean hasTerms = false;
+      boolean hasSubBlocks = false;
 
-    // Write the top count entries on the pending stack as
-    // one or more blocks.  Returns how many blocks were
-    // written.  If the entry count is <= maxItemsPerBlock
-    // we just write a single block; else we break into
-    // primary (initial) block and then one or more
-    // following floor blocks:
-
-    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
-      if (count <= maxItemsInBlock) {
-        // Easy case: not floor block.  Eg, prefix is "foo",
-        // and we found 30 terms/sub-blocks starting w/ that
-        // prefix, and minItemsInBlock <= 30 <=
-        // maxItemsInBlock.
-        final PendingBlock nonFloorBlock = writeBlock(prevTerm, prefixLength, prefixLength, count, count, 0, false, -1, true);
-        nonFloorBlock.compileIndex(null, scratchBytes);
-        pending.add(nonFloorBlock);
-      } else {
-        // Floor block case.  Eg, prefix is "foo" but we
-        // have 100 terms/sub-blocks starting w/ that
-        // prefix.  We segment the entries into a primary
-        // block and following floor blocks using the first
-        // label in the suffix to assign to floor blocks.
-
-        // TODO: we could store min & max suffix start byte
-        // in each block, to make floor blocks authoritative
-
-        /*
-        if (DEBUG) {
-          final BytesRef prefix = new BytesRef(prefixLength);
-          for(int m=0;m<prefixLength;m++) {
-            prefix.bytes[m] = (byte) prevTerm.ints[m];
-          }
-          prefix.length = prefixLength;
-          //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
-          System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
-        }
-        */
-        //System.out.println("\nwbs count=" + count);
+      int start = pending.size()-count;
+      int end = pending.size();
+      int nextBlockStart = start;
+      int nextFloorLeadLabel = -1;
 
-        final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
+      for (int i=start; i<end; i++) {
 
-        // Count up how many items fall under
-        // each unique label after the prefix.
-        
-        // TODO: this is wasteful since the builder had
-        // already done this (partitioned these sub-terms
-        // according to their leading prefix byte)
-        
-        final List<PendingEntry> slice = pending.subList(pending.size()-count, pending.size());
-        int lastSuffixLeadLabel = -1;
-        int termCount = 0;
-        int subCount = 0;
-        int numSubs = 0;
+        PendingEntry ent = pending.get(i);
 
-        for(PendingEntry ent : slice) {
+        int suffixLeadLabel;
 
-          // First byte in the suffix of this term
-          final int suffixLeadLabel;
-          if (ent.isTerm) {
-            PendingTerm term = (PendingTerm) ent;
-            if (term.term.length == prefixLength) {
-              // Suffix is 0, ie prefix 'foo' and term is
-              // 'foo' so the term has empty string suffix
-              // in this block
-              assert lastSuffixLeadLabel == -1;
-              assert numSubs == 0;
-              suffixLeadLabel = -1;
-            } else {
-              suffixLeadLabel = term.term.bytes[term.term.offset + prefixLength] & 0xff;
-            }
+        if (ent.isTerm) {
+          PendingTerm term = (PendingTerm) ent;
+          if (term.termBytes.length == prefixLength) {
+            // Suffix is 0, i.e. prefix 'foo' and term is
+            // 'foo' so the term has empty string suffix
+            // in this block
+            assert lastSuffixLeadLabel == -1;
+            suffixLeadLabel = -1;
           } else {
-            PendingBlock block = (PendingBlock) ent;
-            assert block.prefix.length > prefixLength;
-            suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
+            suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
           }
-
-          if (suffixLeadLabel != lastSuffixLeadLabel && (termCount + subCount) != 0) {
-            if (subBytes.length == numSubs) {
-              subBytes = ArrayUtil.grow(subBytes);
-              subTermCounts = ArrayUtil.grow(subTermCounts);
-              subSubCounts = ArrayUtil.grow(subSubCounts);
-            }
-            subBytes[numSubs] = lastSuffixLeadLabel;
-            lastSuffixLeadLabel = suffixLeadLabel;
-            subTermCounts[numSubs] = termCount;
-            subSubCounts[numSubs] = subCount;
-            /*
-            if (suffixLeadLabel == -1) {
-              System.out.println("  sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
-            } else {
-              System.out.println("  sub " + Integer.toHexString(suffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
-            }
-            */
-            termCount = subCount = 0;
-            numSubs++;
-          }
-
-          if (ent.isTerm) {
-            termCount++;
-          } else {
-            subCount++;
+        } else {
+          PendingBlock block = (PendingBlock) ent;
+          assert block.prefix.length > prefixLength;
+          suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
+        }
+        // if (DEBUG) System.out.println("  i=" + i + " ent=" + ent + " suffixLeadLabel=" + suffixLeadLabel);
+
+        if (suffixLeadLabel != lastSuffixLeadLabel) {
+          int itemsInBlock = i - nextBlockStart;
+          if (itemsInBlock >= minItemsInBlock && end-nextBlockStart > maxItemsInBlock) {
+            // The count is too large for one block, so we must break it into "floor" blocks, where we record
+            // the leading label of the suffix of the first term in each floor block, so at search time we can
+            // jump to the right floor block.  We just use a naive greedy segmenter here: make a new floor
+            // block as soon as we have at least minItemsInBlock.  This is not always best: it often produces
+            // a too-small block as the final block:
+            boolean isFloor = itemsInBlock < count;
+            newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
+
+            hasTerms = false;
+            hasSubBlocks = false;
+            nextFloorLeadLabel = suffixLeadLabel;
+            nextBlockStart = i;
           }
-        }
 
-        if (subBytes.length == numSubs) {
-          subBytes = ArrayUtil.grow(subBytes);
-          subTermCounts = ArrayUtil.grow(subTermCounts);
-          subSubCounts = ArrayUtil.grow(subSubCounts);
+          lastSuffixLeadLabel = suffixLeadLabel;
         }
 
-        subBytes[numSubs] = lastSuffixLeadLabel;
-        subTermCounts[numSubs] = termCount;
-        subSubCounts[numSubs] = subCount;
-        numSubs++;
-        /*
-        if (lastSuffixLeadLabel == -1) {
-          System.out.println("  sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
+        if (ent.isTerm) {
+          hasTerms = true;
         } else {
-          System.out.println("  sub " + Integer.toHexString(lastSuffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
+          hasSubBlocks = true;
         }
-        */
+      }
 
-        if (subTermCountSums.length < numSubs) {
-          subTermCountSums = ArrayUtil.grow(subTermCountSums, numSubs);
-        }
+      // Write last block, if any:
+      if (nextBlockStart < end) {
+        int itemsInBlock = end - nextBlockStart;
+        boolean isFloor = itemsInBlock < count;
+        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
+      }
 
-        // Roll up (backwards) the termCounts; postings impl
-        // needs this to know where to pull the term slice
-        // from its pending terms stack:
-        int sum = 0;
-        for(int idx=numSubs-1;idx>=0;idx--) {
-          sum += subTermCounts[idx];
-          subTermCountSums[idx] = sum;
-        }
+      assert newBlocks.isEmpty() == false;
 
-        // TODO: make a better segmenter?  It'd have to
-        // absorb the too-small end blocks backwards into
-        // the previous blocks
-
-        // Naive greedy segmentation; this is not always
-        // best (it can produce a too-small block as the
-        // last block):
-        int pendingCount = 0;
-        int startLabel = subBytes[0];
-        int curStart = count;
-        subCount = 0;
-
-        final List<PendingBlock> floorBlocks = new ArrayList<>();
-        PendingBlock firstBlock = null;
-
-        for(int sub=0;sub<numSubs;sub++) {
-          pendingCount += subTermCounts[sub] + subSubCounts[sub];
-          //System.out.println("  " + (subTermCounts[sub] + subSubCounts[sub]));
-          subCount++;
-
-          // Greedily make a floor block as soon as we've
-          // crossed the min count
-          if (pendingCount >= minItemsInBlock) {
-            final int curPrefixLength;
-            if (startLabel == -1) {
-              curPrefixLength = prefixLength;
-            } else {
-              curPrefixLength = 1+prefixLength;
-              // floor term:
-              prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
-            }
-            //System.out.println("  " + subCount + " subs");
-            final PendingBlock floorBlock = writeBlock(prevTerm, prefixLength, curPrefixLength, curStart, pendingCount, subTermCountSums[1+sub], true, startLabel, curStart == pendingCount);
-            if (firstBlock == null) {
-              firstBlock = floorBlock;
-            } else {
-              floorBlocks.add(floorBlock);
-            }
-            curStart -= pendingCount;
-            //System.out.println("    = " + pendingCount);
-            pendingCount = 0;
-
-            assert minItemsInBlock == 1 || subCount > 1: "minItemsInBlock=" + minItemsInBlock + " subCount=" + subCount + " sub=" + sub + " of " + numSubs + " subTermCount=" + subTermCountSums[sub] + " subSubCount=" + subSubCounts[sub] + " depth=" + prefixLength;
-            subCount = 0;
-            startLabel = subBytes[sub+1];
+      PendingBlock firstBlock = newBlocks.get(0);
 
-            if (curStart == 0) {
-              break;
-            }
+      assert firstBlock.isFloor || newBlocks.size() == 1;
 
-            if (curStart <= maxItemsInBlock) {
-              // remainder is small enough to fit into a
-              // block.  NOTE that this may be too small (<
-              // minItemsInBlock); need a true segmenter
-              // here
-              assert startLabel != -1;
-              assert firstBlock != null;
-              prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
-              //System.out.println("  final " + (numSubs-sub-1) + " subs");
-              /*
-              for(sub++;sub < numSubs;sub++) {
-                System.out.println("  " + (subTermCounts[sub] + subSubCounts[sub]));
-              }
-              System.out.println("    = " + curStart);
-              if (curStart < minItemsInBlock) {
-                System.out.println("      **");
-              }
-              */
-              floorBlocks.add(writeBlock(prevTerm, prefixLength, prefixLength+1, curStart, curStart, 0, true, startLabel, true));
-              break;
-            }
-          }
-        }
+      firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef);
 
-        prevTerm.ints[prevTerm.offset + prefixLength] = savLabel;
+      // Remove slice from the top of the pending stack, that we just wrote:
+      pending.subList(pending.size()-count, pending.size()).clear();
 
-        assert firstBlock != null;
-        firstBlock.compileIndex(floorBlocks, scratchBytes);
+      // Append new block
+      pending.add(firstBlock);
 
-        pending.add(firstBlock);
-        //if (DEBUG) System.out.println("  done pending.size()=" + pending.size());
-      }
-      lastBlockIndex = pending.size()-1;
+      newBlocks.clear();
     }
 
-    // BytesRef prefix;
+    /** Writes the specified slice (start is inclusive, end is exclusive)
+     *  from pending stack as a new block.  If isFloor is true, there
+     *  were too many (more than maxItemsInBlock) entries sharing the
+     *  same prefix, and so we broke it into multiple floor blocks where
+     *  we record the starting label of the suffix of each floor block. */
+    private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end, boolean hasTerms, boolean hasSubBlocks) throws IOException {
 
-    // for debugging
-    @SuppressWarnings("unused")
-    private String toString(BytesRef b) {
-      try {
-        return b.utf8ToString() + " " + b;
-      } catch (Throwable t) {
-        // If BytesRef isn't actually UTF8, or it's eg a
-        // prefix of UTF8 that ends mid-unicode-char, we
-        // fallback to hex:
-        return b.toString();
-      }
-    }
-
-    // Writes all entries in the pending slice as a single
-    // block: 
-    private PendingBlock writeBlock(IntsRef prevTerm, int prefixLength, int indexPrefixLength, int startBackwards, int length,
-                                    int futureTermCount, boolean isFloor, int floorLeadByte, boolean isLastInFloor) throws IOException {
-
-      assert length > 0;
+      assert end > start;
 
-      final int start = pending.size()-startBackwards;
+      long startFP = out.getFilePointer();
 
-      assert start >= 0: "pending.size()=" + pending.size() + " startBackwards=" + startBackwards + " length=" + length;
+      // if (DEBUG) System.out.println("    writeBlock fp=" + startFP + " isFloor=" + isFloor + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
 
-      final List<PendingEntry> slice = pending.subList(start, start + length);
+      boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
 
-      final long startFP = out.getFilePointer();
-
-      // System.out.println("\nwriteBlock field=" + fieldInfo.name + " seg=" + segment + " prefixLength=" + prefixLength + " floorLeadByte=" + floorLeadByte + " isLastInFloor=" + isLastInFloor + " length=" + length + " startFP=" + startFP);
-
-      final BytesRef prefix = new BytesRef(indexPrefixLength);
-      for(int m=0;m<indexPrefixLength;m++) {
-        prefix.bytes[m] = (byte) prevTerm.ints[m];
-      }
-      prefix.length = indexPrefixLength;
-      // System.out.println("  prefix=" + toString(prefix));
-      // this.prefix = prefix;
+      final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0));
+      System.arraycopy(lastTerm.bytes, 0, prefix.bytes, 0, prefixLength);
+      prefix.length = prefixLength;
 
       // Write block header:
-      out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
+      int numEntries = end - start;
+      int code = numEntries << 1;
+      if (end == pending.size()) {
+        // Last block:
+        code |= 1;
+      }
+      out.writeVInt(code);
 
       // if (DEBUG) {
-      //  System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
+      //   System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
       // }
 
-      // 1st pass: pack term suffix bytes into byte[] blob
-      // TODO: cutover to bulk int codec... simple64?
-
-      final boolean isLeafBlock;
-      if (lastBlockIndex < start) {
-        // This block definitely does not contain sub-blocks:
-        isLeafBlock = true;
-        //System.out.println("no scan true isFloor=" + isFloor);
-      } else if (!isFloor) {
-        // This block definitely does contain at least one sub-block:
-        isLeafBlock = false;
-        //System.out.println("no scan false " + lastBlockIndex + " vs start=" + start + " len=" + length);
-      } else {
-        // Must scan up-front to see if there is a sub-block
-        boolean v = true;
-        //System.out.println("scan " + lastBlockIndex + " vs start=" + start + " len=" + length);
-        for (PendingEntry ent : slice) {
-          if (!ent.isTerm) {
-            v = false;
-            break;
-          }
-        }
-        isLeafBlock = v;
-      }
-      // System.out.println("  isLeaf=" + isLeafBlock);
-
       final List<SubIndex> subIndices;
 
+      // We optimize the leaf block case (block has only terms), writing a more
+      // compact format in this case:
+      boolean isLeafBlock = hasSubBlocks == false;
+
       // Number of terms in this block
       int termCount;
 
       // Number of terms in this block and all sub-blocks (recursively)
       long totalTermCount;
 
-      long[] longs = new long[longsSize];
       boolean absolute = true;
 
-      int countx = 0;
       if (isLeafBlock) {
+        // Only terms:
         subIndices = null;
-        for (PendingEntry ent : slice) {
-          assert ent.isTerm;
+        for (int i=start;i<end;i++) {
+          PendingEntry ent = pending.get(i);
+          assert ent.isTerm: "i=" + i;
+
           PendingTerm term = (PendingTerm) ent;
+          assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
           BlockTermState state = term.state;
-          final int suffix = term.term.length - prefixLength;
+          final int suffix = term.termBytes.length - prefixLength;
           /*
           if (DEBUG) {
-              BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-              suffixBytes.length = suffix;
-              System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes));
+            BytesRef suffixBytes = new BytesRef(suffix);
+            System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            suffixBytes.length = suffix;
+            System.out.println("    write term suffix=" + suffixBytes);
           }
           */
           // For leaf block we write suffix straight
           suffixWriter.writeVInt(suffix);
-          suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
+          suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
+          assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
 
           // Write term stats, to separate byte[] blob:
           statsWriter.writeVInt(state.docFreq);
@@ -819,7 +634,6 @@ public final class OrdsBlockTreeTermsWri
             assert state.totalTermFreq >= state.docFreq: state.totalTermFreq + " vs " + state.docFreq;
             statsWriter.writeVLong(state.totalTermFreq - state.docFreq);
           }
-          // System.out.println("    dF=" + state.docFreq + " tTF=" + state.totalTermFreq);
 
           // Write term meta data
           postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
@@ -831,29 +645,33 @@ public final class OrdsBlockTreeTermsWri
           bytesWriter.reset();
           absolute = false;
         }
-        termCount = length;
-        totalTermCount = length;
+        termCount = end-start;
+        totalTermCount = end-start;
       } else {
+        // Mixed terms and sub-blocks:
         subIndices = new ArrayList<>();
         termCount = 0;
         totalTermCount = 0;
-        for (PendingEntry ent : slice) {
+        for (int i=start;i<end;i++) {
+          PendingEntry ent = pending.get(i);
           if (ent.isTerm) {
             PendingTerm term = (PendingTerm) ent;
+            assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
             BlockTermState state = term.state;
-            final int suffix = term.term.length - prefixLength;
+            final int suffix = term.termBytes.length - prefixLength;
             /*
             if (DEBUG) {
-                BytesRef suffixBytes = new BytesRef(suffix);
-                System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-                suffixBytes.length = suffix;
-                System.out.println("    " + (countx++) + ": write term suffix=" + toString(suffixBytes) + " termOrd=" + totalTermCount);
+              BytesRef suffixBytes = new BytesRef(suffix);
+              System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+              suffixBytes.length = suffix;
+              System.out.println("    write term suffix=" + suffixBytes);
             }
             */
             // For non-leaf block we borrow 1 bit to record
             // if entry is term or sub-block
             suffixWriter.writeVInt(suffix<<1);
-            suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
+            suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
+            assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
 
             // Write term stats, to separate byte[] blob:
             statsWriter.writeVInt(state.docFreq);
@@ -884,27 +702,30 @@ public final class OrdsBlockTreeTermsWri
             totalTermCount++;
           } else {
             PendingBlock block = (PendingBlock) ent;
+            assert StringHelper.startsWith(block.prefix, prefix);
             final int suffix = block.prefix.length - prefixLength;
 
             assert suffix > 0;
 
-            // For non-leaf block we steal 1 bit to record
+            // For non-leaf block we borrow 1 bit to record
             // if entry is term or sub-block
             suffixWriter.writeVInt((suffix<<1)|1);
             suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
-            assert block.fp < startFP;
 
-            suffixWriter.writeVLong(startFP - block.fp);
+            assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel;
+
+            assert block.fp < startFP;
 
             /*
             if (DEBUG) {
-                BytesRef suffixBytes = new BytesRef(suffix);
-                System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-                suffixBytes.length = suffix;
-                System.out.println("    " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor + " totFloorTermCount=" + block.totFloorTermCount);
+              BytesRef suffixBytes = new BytesRef(suffix);
+              System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+              suffixBytes.length = suffix;
+              System.out.println("    write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
             }
             */
 
+            suffixWriter.writeVLong(startFP - block.fp);
             suffixWriter.writeVLong(block.totFloorTermCount);
             subIndices.add(new SubIndex(block.index, totalTermCount));
             totalTermCount += block.totFloorTermCount;
@@ -925,7 +746,6 @@ public final class OrdsBlockTreeTermsWri
 
       // Write term stats byte[] blob
       out.writeVInt((int) statsWriter.getFilePointer());
-      //System.out.println("write stats @ fp=" + out.getFilePointer());
       statsWriter.writeTo(out);
       statsWriter.reset();
 
@@ -934,41 +754,22 @@ public final class OrdsBlockTreeTermsWri
       metaWriter.writeTo(out);
       metaWriter.reset();
 
-      // Remove slice replaced by block:
-      slice.clear();
-
-      if (lastBlockIndex >= start) {
-        if (lastBlockIndex < start+length) {
-          lastBlockIndex = start;
-        } else {
-          lastBlockIndex -= length;
-        }
-      }
-
       // if (DEBUG) {
       //   System.out.println("      fpEnd=" + out.getFilePointer());
       // }
 
-      return new PendingBlock(prefix, startFP, termCount != 0, totalTermCount, isFloor, floorLeadByte, subIndices);
+      if (hasFloorLeadLabel) {
+        // We already allocated to length+1 above:
+        prefix.bytes[prefix.length++] = (byte) floorLeadLabel;
+      }
+
+      return new PendingBlock(prefix, startFP, hasTerms, totalTermCount, isFloor, floorLeadLabel, subIndices);
     }
 
     TermsWriter(FieldInfo fieldInfo) {
       this.fieldInfo = fieldInfo;
-
-      noOutputs = NoOutputs.getSingleton();
-
-      // This Builder is just used transiently to fragment
-      // terms into "good" blocks; we don't save the
-      // resulting FST:
-      blockBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
-                                   0, 0, true,
-                                   true, Integer.MAX_VALUE,
-                                   noOutputs,
-                                   new FindBlocks(), false,
-                                   PackedInts.COMPACT,
-                                   true, 15);
-
       this.longsSize = postingsWriter.setField(fieldInfo);
+      this.longs = new long[longsSize];
     }
     
     @Override
@@ -990,14 +791,14 @@ public final class OrdsBlockTreeTermsWri
       return postingsWriter;
     }
 
-    private final IntsRef scratchIntsRef = new IntsRef();
-
     /** Writes one term's worth of postings. */
     @Override
     public void finishTerm(BytesRef text, TermStats stats) throws IOException {
       assert stats.docFreq != 0;
+      assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || stats.totalTermFreq >= stats.docFreq: "postingsWriter=" + postingsWriter;
+
+      pushTerm(text);
 
-      blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
       BlockTermState state = postingsWriter.newTermState();
       state.docFreq = stats.docFreq;
       state.totalTermFreq = stats.totalTermFreq;
@@ -1013,10 +814,52 @@ public final class OrdsBlockTreeTermsWri
       maxTerm.copyBytes(text);
     }
 
+    /** Pushes the new term to the top of the stack, and writes new blocks. */
+    private void pushTerm(BytesRef text) throws IOException {
+      int limit = Math.min(lastTerm.length, text.length);
+
+      // Find common prefix between last term and current term:
+      int pos = 0;
+      while (pos < limit && lastTerm.bytes[pos] == text.bytes[text.offset+pos]) {
+        pos++;
+      }
+
+      // if (DEBUG) System.out.println("  shared=" + pos + "  lastTerm.length=" + lastTerm.length);
+
+      // Close the "abandoned" suffix now:
+      for(int i=lastTerm.length-1;i>=pos;i--) {
+
+        // How many items on top of the stack share the current suffix
+        // we are closing:
+        int prefixTopSize = pending.size() - prefixStarts[i];
+        if (prefixTopSize >= minItemsInBlock) {
+          // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInBlock);
+          writeBlocks(i+1, prefixTopSize);
+          prefixStarts[i] -= prefixTopSize-1;
+        }
+      }
+
+      if (prefixStarts.length < text.length) {
+        prefixStarts = ArrayUtil.grow(prefixStarts, text.length);
+      }
+
+      // Init new tail:
+      for(int i=pos;i<text.length;i++) {
+        prefixStarts[i] = pending.size();
+      }
+
+      lastTerm.copyBytes(text);
+    }
+
     // Finishes all terms in this field
     public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
       if (numTerms > 0) {
-        blockBuilder.finish();
+        // if (DEBUG) System.out.println("BTTW.finish pending.size()=" + pending.size());
+
+        // TODO: if pending.size() is already 1 with a non-zero prefix length
+        // we can save writing a "degenerate" root block, but we have to
+        // fix all the places that assume the root block's prefix is the empty string:
+        writeBlocks(0, pending.size());
 
         // We better have one final "root" block:
         assert pending.size() == 1 && !pending.get(0).isTerm: "pending.size()=" + pending.size() + " pending=" + pending;

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1613235&r1=1613234&r2=1613235&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Thu Jul 24 18:23:06 2014
@@ -122,7 +122,7 @@ public final class MemoryPostingsFormat 
       this.field = field;
       this.doPackFST = doPackFST;
       this.acceptableOverheadRatio = acceptableOverheadRatio;
-      builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
+      builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
     }
 
     private class PostingsWriter extends PostingsConsumer {

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java?rev=1613235&r1=1613234&r2=1613235&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java Thu Jul 24 18:23:06 2014
@@ -109,6 +109,9 @@ public class TestOrdsBlockTree extends B
       doc.add(newTextField("field", term, Field.Store.NO));
       w.addDocument(doc);
     }
+    if (VERBOSE) {
+      System.out.println("TEST: now forceMerge");
+    }
     w.forceMerge(1);
     IndexReader r = w.getReader();
     TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);