You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/06/13 16:05:02 UTC

svn commit: r1349859 - in /lucene/dev/branches/branch_4x/lucene: analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/ analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ core/src/java/org/apache/lucene/codecs/memory/ cor...

Author: jpountz
Date: Wed Jun 13 14:05:01 2012
New Revision: 1349859

URL: http://svn.apache.org/viewvc?rev=1349859&view=rev
Log:
LUCENE-4120: FST.pack: Use packed integer arrays for improved memory efficiency (merged from r1349826 and r1349830).

Modified:
    lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
    lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValues.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java

Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary%24fst.dat?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Wed Jun 13 14:05:01 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.packed.PackedInts;
 
 import com.ibm.icu.text.Normalizer2;
 
@@ -161,7 +162,7 @@ public class TokenInfoDictionaryBuilder 
       offset = next;
     }
     
-    final FST<Long> fst = fstBuilder.finish().pack(2, 100000);
+    final FST<Long> fst = fstBuilder.finish().pack(2, 100000, PackedInts.DEFAULT);
     
     System.out.print("  " + fst.getNodeCount() + " nodes, " + fst.getArcCount() + " arcs, " + fst.sizeInBytes() + " bytes...  ");
     dictionary.setFST(fst);

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Wed Jun 13 14:05:01 2012
@@ -54,6 +54,7 @@ import org.apache.lucene.util.fst.ByteSe
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.PackedInts;
 
 // TODO: would be nice to somehow allow this to act like
 // InstantiatedIndex, by never writing to disk; ie you write
@@ -81,14 +82,16 @@ import org.apache.lucene.util.fst.Util;
 public class MemoryPostingsFormat extends PostingsFormat {
 
   private final boolean doPackFST;
+  private final float acceptableOverheadRatio;
 
   public MemoryPostingsFormat() {
-    this(false);
+    this(false, PackedInts.DEFAULT);
   }
 
-  public MemoryPostingsFormat(boolean doPackFST) {
+  public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
     super("Memory");
     this.doPackFST = doPackFST;
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
   }
   
   @Override
@@ -102,13 +105,15 @@ public class MemoryPostingsFormat extend
     private final Builder<BytesRef> builder;
     private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
     private final boolean doPackFST;
+    private final float acceptableOverheadRatio;
     private int termCount;
 
-    public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST) {
+    public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
       this.out = out;
       this.field = field;
       this.doPackFST = doPackFST;
-      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST);
+      this.acceptableOverheadRatio = acceptableOverheadRatio;
+      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio);
     }
 
     private class PostingsWriter extends PostingsConsumer {
@@ -265,7 +270,7 @@ public class MemoryPostingsFormat extend
         out.writeVInt(docCount);
         FST<BytesRef> fst = builder.finish();
         if (doPackFST) {
-          fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
+          fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
         }
         fst.save(out);
         //System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
@@ -290,7 +295,7 @@ public class MemoryPostingsFormat extend
       @Override
       public TermsConsumer addField(FieldInfo field) {
         //System.out.println("\naddField field=" + field.name);
-        return new TermsWriter(out, field, doPackFST);
+        return new TermsWriter(out, field, doPackFST, acceptableOverheadRatio);
       }
 
       @Override

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValues.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValues.java Wed Jun 13 14:05:01 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.document.Packed
 import org.apache.lucene.document.ShortDocValuesField; // javadocs
 import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs
 import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -411,6 +412,11 @@ public abstract class DocValues implemen
         Arrays.fill(arr, off, off+len, 0);
         return len;
       }
+
+      @Override
+      public long ramBytesUsed() {
+        return 0;
+      }
     };
 
     return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java Wed Jun 13 14:05:01 2012
@@ -23,6 +23,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
+import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * Builds a minimal FST (maps an IntsRef term to an arbitrary
@@ -83,7 +84,18 @@ public class Builder<T> {
    * pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT);
+  }
+
+  /**
+   * Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
+   * <code>acceptableOverheadRatio</code>.
+   */
+  public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
+      boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
+      FreezeTail<T> freezeTail, boolean willPackFST) {
+    this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
+        shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT);
   }
 
   /**
@@ -126,17 +138,20 @@ public class Builder<T> {
    * @param willPackFST Pass true if you will pack the FST before saving.  This
    *    causes the FST to create additional data structures internally to facilitate packing, but
    *    it means the resulting FST cannot be saved: it must
-   *    first be packed using {@link FST#pack(int, int)}}.
+   *    first be packed using {@link FST#pack(int, int, float)}
+   *
+   * @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
+   *    is only relevant when willPackFST is true. @see PackedInts#getMutable(int, int, float)
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                  boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-                 FreezeTail<T> freezeTail, boolean willPackFST) {
+                 FreezeTail<T> freezeTail, boolean willPackFST, float acceptableOverheadRatio) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
     this.freezeTail = freezeTail;
     this.doShareNonSingletonNodes = doShareNonSingletonNodes;
     this.shareMaxTailLength = shareMaxTailLength;
-    fst = new FST<T>(inputType, outputs, willPackFST);
+    fst = new FST<T>(inputType, outputs, willPackFST, acceptableOverheadRatio);
     if (doShareSuffix) {
       dedupHash = new NodeHash<T>(fst);
     } else {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Wed Jun 13 14:05:01 2012
@@ -37,8 +37,9 @@ import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.Builder.UnCompiledNode;
+import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.PackedInts;
 
 // TODO: break this into WritableFST and ReadOnlyFST.. then
 // we can have subclasses of ReadOnlyFST to handle the
@@ -155,7 +156,7 @@ public final class FST<T> {
   public int arcWithOutputCount;
 
   private final boolean packed;
-  private final int[] nodeRefToAddress;
+  private PackedInts.Reader nodeRefToAddress;
 
   // If arc has this label then that arc is final/accepted
   public static final int END_LABEL = -1;
@@ -252,25 +253,23 @@ public final class FST<T> {
 
   private final BytesWriter writer;
 
-  // TODO: we can save RAM here by using growable packed
-  // ints...:
-  private int[] nodeAddress;
+  private GrowableWriter nodeAddress;
 
   // TODO: we could be smarter here, and prune periodically
   // as we go; high in-count nodes will "usually" become
   // clear early on:
-  private int[] inCounts;
+  private GrowableWriter inCounts;
 
   // make a new empty FST, for building; Builder invokes
   // this ctor
-  FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST) {
+  FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio) {
     this.inputType = inputType;
     this.outputs = outputs;
     bytes = new byte[128];
     NO_OUTPUT = outputs.getNoOutput();
     if (willPackFST) {
-      nodeAddress = new int[8];
-      inCounts = new int[8];
+      nodeAddress = new GrowableWriter(PackedInts.bitsRequired(bytes.length - 1), 8, acceptableOverheadRatio);
+      inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
     } else {
       nodeAddress = null;
       inCounts = null;
@@ -320,11 +319,7 @@ public final class FST<T> {
       throw new IllegalStateException("invalid input type " + t);
     }
     if (packed) {
-      final int nodeRefCount = in.readVInt();
-      nodeRefToAddress = new int[nodeRefCount];
-      for(int idx=0;idx<nodeRefCount;idx++) {
-        nodeRefToAddress[idx] = in.readVInt();
-      }
+      nodeRefToAddress = PackedInts.getReader(in);
     } else {
       nodeRefToAddress = null;
     }
@@ -348,10 +343,10 @@ public final class FST<T> {
   public int sizeInBytes() {
     int size = bytes.length;
     if (packed) {
-      size += nodeRefToAddress.length * RamUsageEstimator.NUM_BYTES_INT;
+      size += nodeRefToAddress.ramBytesUsed();
     } else if (nodeAddress != null) {
-      size += nodeAddress.length * RamUsageEstimator.NUM_BYTES_INT;
-      size += inCounts.length * RamUsageEstimator.NUM_BYTES_INT;
+      size += nodeAddress.ramBytesUsed();
+      size += inCounts.ramBytesUsed();
     }
     return size;
   }
@@ -374,7 +369,7 @@ public final class FST<T> {
   private int getNodeAddress(int node) {
     if (nodeAddress != null) {
       // Deref
-      return nodeAddress[node];
+      return (int) nodeAddress.get(node);
     } else {
       // Straight
       return node;
@@ -444,6 +439,9 @@ public final class FST<T> {
     if (nodeAddress != null) {
       throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
     }
+    if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
+      throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
+    }
     CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
     if (packed) {
       out.writeByte((byte) 1);
@@ -469,11 +467,7 @@ public final class FST<T> {
     }
     out.writeByte(t);
     if (packed) {
-      assert nodeRefToAddress != null;
-      out.writeVInt(nodeRefToAddress.length);
-      for(int idx=0;idx<nodeRefToAddress.length;idx++) {
-        out.writeVInt(nodeRefToAddress[idx]);
-      }
+      ((PackedInts.Mutable) nodeRefToAddress).save(out);
     }
     out.writeVInt(startNode);
     out.writeVInt(nodeCount);
@@ -624,7 +618,7 @@ public final class FST<T> {
       if (!targetHasArcs) {
         flags += BIT_STOP_NODE;
       } else if (inCounts != null) {
-        inCounts[target.node]++;
+        inCounts.set(target.node, inCounts.get(target.node) + 1);
       }
 
       if (arc.output != NO_OUTPUT) {
@@ -715,11 +709,11 @@ public final class FST<T> {
     final int node;
     if (nodeAddress != null) {
       // Nodes are addressed by 1+ord:
-      if (nodeCount == nodeAddress.length) {
-        nodeAddress = ArrayUtil.grow(nodeAddress);
-        inCounts = ArrayUtil.grow(inCounts);
+      if (nodeCount == nodeAddress.size()) {
+        nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
+        inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
       }
-      nodeAddress[nodeCount] = endAddress;
+      nodeAddress.set(nodeCount, endAddress);
       // System.out.println("  write nodeAddress[" + nodeCount + "] = " + endAddress);
       node = nodeCount;
     } else {
@@ -1005,9 +999,9 @@ public final class FST<T> {
           // Address is delta-coded from current address:
           arc.target = pos + code;
           //System.out.println("    delta pos=" + pos + " delta=" + code + " target=" + arc.target);
-        } else if (code < nodeRefToAddress.length) {
+        } else if (code < nodeRefToAddress.size()) {
           // Deref
-          arc.target = nodeRefToAddress[code];
+          arc.target = (int) nodeRefToAddress.get(code);
           //System.out.println("    deref code=" + code + " target=" + arc.target);
         } else {
           // Absolute
@@ -1420,7 +1414,7 @@ public final class FST<T> {
  */
 
   // Creates a packed FST
-  private FST(INPUT_TYPE inputType, int[] nodeRefToAddress, Outputs<T> outputs) {
+  private FST(INPUT_TYPE inputType, PackedInts.Reader nodeRefToAddress, Outputs<T> outputs) {
     packed = true;
     this.inputType = inputType;
     bytes = new byte[128];
@@ -1432,8 +1426,10 @@ public final class FST<T> {
 
   /** Expert: creates an FST by packing this one.  This
    *  process requires substantial additional RAM (currently
-   *  ~8 bytes per node), but then should produce a smaller FST. */
-  public FST<T> pack(int minInCountDeref, int maxDerefNodes) throws IOException {
+   *  up to ~8 bytes per node depending on
+   *  <code>acceptableOverheadRatio</code>), but then should
+   *  produce a smaller FST. */
+  public FST<T> pack(int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
 
     // TODO: other things to try
     //   - renumber the nodes to get more next / better locality?
@@ -1454,22 +1450,22 @@ public final class FST<T> {
 
     final BytesReader r = getBytesReader(0);
 
-    final int topN = Math.min(maxDerefNodes, inCounts.length);
+    final int topN = Math.min(maxDerefNodes, inCounts.size());
 
     // Find top nodes with highest number of incoming arcs:
     NodeQueue q = new NodeQueue(topN);
 
     // TODO: we could use more RAM efficient selection algo here...
     NodeAndInCount bottom = null;
-    for(int node=0;node<inCounts.length;node++) {
-      if (inCounts[node] >= minInCountDeref) {
+    for(int node=0; node<inCounts.size(); node++) {
+      if (inCounts.get(node) >= minInCountDeref) {
         if (bottom == null) {
-          q.add(new NodeAndInCount(node, inCounts[node]));
+          q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
           if (q.size() == topN) {
             bottom = q.top();
           }
-        } else if (inCounts[node] > bottom.count) {
-          q.insertWithOverflow(new NodeAndInCount(node, inCounts[node]));
+        } else if (inCounts.get(node) > bottom.count) {
+          q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
         }
       }
     }
@@ -1484,20 +1480,17 @@ public final class FST<T> {
       //System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
     }
 
-    // TODO: we can use packed ints:
-    // +1 because node ords start at 1 (0 is reserved as
-    // stop node):
-    final int[] nodeRefToAddressIn = new int[topNodeMap.size()];
-
-    final FST<T> fst = new FST<T>(inputType, nodeRefToAddressIn, outputs);
+    final FST<T> fst = new FST<T>(inputType, null, outputs);
 
     final BytesWriter writer = fst.writer;
-    
-    final int[] newNodeAddress = new int[1+nodeCount];
+
+    // +1 because node ords start at 1 (0 is reserved as stop node):
+    final GrowableWriter newNodeAddress = new GrowableWriter(
+        PackedInts.bitsRequired(bytes.length), 1 + nodeCount, acceptableOverheadRatio);
 
     // Fill initial coarse guess:
     for(int node=1;node<=nodeCount;node++) {
-      newNodeAddress[node] = 1 + bytes.length - nodeAddress[node];
+      newNodeAddress.set(node, 1 + bytes.length - nodeAddress.get(node));
     }
 
     int absCount;
@@ -1537,11 +1530,11 @@ public final class FST<T> {
         fst.nodeCount++;
         final int address = writer.posWrite;
         //System.out.println("  node: " + node + " address=" + address);
-        if (address != newNodeAddress[node]) {
-          addressError = address - newNodeAddress[node];
+        if (address != newNodeAddress.get(node)) {
+          addressError = address - (int) newNodeAddress.get(node);
           //System.out.println("    change: " + (address - newNodeAddress[node]));
           changed = true;
-          newNodeAddress[node] = address;
+          newNodeAddress.set(node, address);
           changedCount++;
         }
 
@@ -1621,10 +1614,10 @@ public final class FST<T> {
               if (ptr != null) {
                 absPtr = ptr;
               } else {
-                absPtr = topNodeMap.size() + newNodeAddress[arc.target] + addressError;
+                absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
               }
 
-              int delta = newNodeAddress[arc.target] + addressError - writer.posWrite - 2;
+              int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite - 2;
               if (delta < 0) {
                 //System.out.println("neg: " + delta);
                 anyNegDelta = true;
@@ -1654,7 +1647,7 @@ public final class FST<T> {
 
             if (doWriteTarget) {
 
-              int delta = newNodeAddress[arc.target] + addressError - writer.posWrite;
+              int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite;
               if (delta < 0) {
                 anyNegDelta = true;
                 //System.out.println("neg: " + delta);
@@ -1745,11 +1738,20 @@ public final class FST<T> {
       //System.out.println("  " + changedCount + " of " + fst.nodeCount + " changed; retry");
     }
 
+    long maxAddress = 0;
+    for (int key : topNodeMap.keySet()) {
+      maxAddress = Math.max(maxAddress, newNodeAddress.get(key));
+    }
+
+    PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
+        PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
     for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
-      nodeRefToAddressIn[ent.getValue()] = newNodeAddress[ent.getKey()];
+      nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
     }
+    fst.nodeRefToAddress = nodeRefToAddressIn;
+    
 
-    fst.startNode = newNodeAddress[startNode];
+    fst.startNode = (int) newNodeAddress.get(startNode);
     //System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
 
     if (emptyOutput != null) {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java Wed Jun 13 14:05:01 2012
@@ -51,4 +51,9 @@ final class DirectPacked64SingleBlockRea
       throw new IllegalStateException("failed", e);
     }
   }
+
+  @Override
+  public long ramBytesUsed() {
+    return 0;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java Wed Jun 13 14:05:01 2012
@@ -73,4 +73,9 @@ final class DirectPackedReader extends P
       throw new IllegalStateException("failed", ioe);
     }
   }
+
+  @Override
+  public long ramBytesUsed() {
+    return 0;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java Wed Jun 13 14:05:01 2012
@@ -17,6 +17,10 @@ package org.apache.lucene.util.packed;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+
 /**     
  * Implements {@link PackedInts.Mutable}, but grows the
  * bit count of the underlying packed ints on-demand.
@@ -111,4 +115,14 @@ public class GrowableWriter implements P
     current.fill(fromIndex, toIndex, val);
   }
 
+  @Override
+  public long ramBytesUsed() {
+    return current.ramBytesUsed();
+  }
+
+  @Override
+  public void save(DataOutput out) throws IOException {
+    current.save(out);
+  }
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java Wed Jun 13 14:05:01 2012
@@ -285,6 +285,11 @@ abstract class Packed64SingleBlock exten
   }
 
   @Override
+  protected int getFormat() {
+    return PackedInts.PACKED_SINGLE_BLOCK;
+  }
+
+  @Override
   public String toString() {
     return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
         + ", size=" + size() + ", elements.length=" + blocks.length + ")";

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Wed Jun 13 14:05:01 2012
@@ -101,6 +101,11 @@ public class PackedInts {
     int size();
 
     /**
+     * Return the in-memory size in bytes.
+     */
+    long ramBytesUsed();
+
+    /**
      * Expert: if the bit-width of this reader matches one of
      * java's native types, returns the underlying array
      * (ie, byte[], short[], int[], long[]); else, returns
@@ -118,6 +123,7 @@ public class PackedInts {
      * @see #getArray
      */
     boolean hasArray();
+
   }
 
   /**
@@ -171,6 +177,7 @@ public class PackedInts {
    * @lucene.internal
    */
   public static interface Mutable extends Reader {
+
     /**
      * Set the value at the given index in the array.
      * @param index where the value should be positioned.
@@ -197,6 +204,13 @@ public class PackedInts {
      */
     void clear();
 
+    /**
+     * Save this mutable into <code>out</code>. Instantiating a reader from
+     * the generated data will return a reader with the same number of bits
+     * per value.
+     */
+    void save(DataOutput out) throws IOException;
+
   }
 
   /**
@@ -239,6 +253,7 @@ public class PackedInts {
       }
       return gets;
     }
+
   }
 
   public static abstract class MutableImpl extends ReaderImpl implements Mutable {
@@ -267,6 +282,18 @@ public class PackedInts {
       }
     }
 
+    protected int getFormat() {
+      return PACKED;
+    }
+
+    @Override
+    public void save(DataOutput out) throws IOException {
+      Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
+      for (int i = 0; i < valueCount; ++i) {
+        writer.add(get(i));
+      }
+      writer.finish();
+    }
   }
 
   /** A write-once Writer.
@@ -470,28 +497,40 @@ public class PackedInts {
     int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
 
     if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
-      return new PackedWriter(out, valueCount, 8);
+      return getWriterByFormat(out, valueCount, 8, PACKED);
     } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
-      return new PackedWriter(out, valueCount, 16);
+      return getWriterByFormat(out, valueCount, 16, PACKED);
     } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
-      return new PackedWriter(out, valueCount, 32);
+      return getWriterByFormat(out, valueCount, 32, PACKED);
     } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
-      return new PackedWriter(out, valueCount, 64);
+      return getWriterByFormat(out, valueCount, 64, PACKED);
     } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
-      return new PackedWriter(out, valueCount, 24);
+      return getWriterByFormat(out, valueCount, 24, PACKED);
     } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
-      return new PackedWriter(out, valueCount, bitsPerValue);
+      return getWriterByFormat(out, valueCount, 48, PACKED);
     } else {
       for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
         if (Packed64SingleBlock.isSupported(bpv)) {
           float overhead = Packed64SingleBlock.overheadPerValue(bpv);
           float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
           if (overhead <= acceptableOverhead) {
-            return new Packed64SingleBlockWriter(out, valueCount, bpv);
+            return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
           }
         }
       }
-      return new PackedWriter(out, valueCount, bitsPerValue);
+      return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
+    }
+  }
+
+  private static Writer getWriterByFormat(DataOutput out,
+      int valueCount, int bitsPerValue, int format) throws IOException {
+    switch (format) {
+      case PACKED:
+        return new PackedWriter(out, valueCount, bitsPerValue);
+      case PACKED_SINGLE_BLOCK:
+        return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
+      default:
+        throw new IllegalArgumentException("Unknown format " + format);
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java Wed Jun 13 14:05:01 2012
@@ -41,7 +41,7 @@ public class TestRollingUpdates extends 
 
     //provider.register(new MemoryCodec());
     if ( (!"Lucene3x".equals(Codec.getDefault().getName())) && random().nextBoolean()) {
-      Codec.setDefault(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean())));
+      Codec.setDefault(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
     }
 
     final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Wed Jun 13 14:05:01 2012
@@ -64,6 +64,7 @@ import org.apache.lucene.util.fst.BytesR
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.packed.PackedInts;
 
 @SuppressCodecs({ "SimpleText", "Memory" })
 public class TestFSTs extends LuceneTestCase {
@@ -536,7 +537,7 @@ public class TestFSTs extends LuceneTest
         if (VERBOSE) {
           System.out.println("TEST: now rewrite");
         }
-        final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
+        final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat());
         if (VERBOSE) {
           System.out.println("TEST: now verify packed FST");
         }
@@ -1182,7 +1183,7 @@ public class TestFSTs extends LuceneTest
           if (rewriteIter == 1) {
             if (doRewrite) {
               // Verify again, with packed FST:
-              fst = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
+              fst = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000), random.nextFloat());
             } else {
               break;
             }
@@ -1324,7 +1325,7 @@ public class TestFSTs extends LuceneTest
 
         if (doPack) {
           System.out.println("Pack...");
-          fst = fst.pack(4, 100000000);
+          fst = fst.pack(4, 100000000, random().nextFloat());
           System.out.println("New size " + fst.sizeInBytes() + " bytes");
         }
         
@@ -1927,7 +1928,7 @@ public class TestFSTs extends LuceneTest
     final Long nothing = outputs.getNoOutput();
     final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
 
-    final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false);
+    final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT);
 
     final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Wed Jun 13 14:05:01 2012
@@ -560,4 +560,40 @@ public class TestPackedInts extends Luce
     assertEquals(1 << 10, wrt.get(valueCount - 1));
   }
 
+  public void testSave() throws IOException {
+    final int valueCount = _TestUtil.nextInt(random(), 1, 2048);
+    for (int bpv = 1; bpv <= 64; ++bpv) {
+      final int maxValue = (int) Math.min(PackedInts.maxValue(31), PackedInts.maxValue(bpv));
+      final RAMDirectory directory = new RAMDirectory();
+      List<PackedInts.Mutable> packedInts = createPackedInts(valueCount, bpv);
+      for (PackedInts.Mutable mutable : packedInts) {
+        for (int i = 0; i < mutable.size(); ++i) {
+          mutable.set(i, random().nextInt(maxValue));
+        }
+
+        IndexOutput out = directory.createOutput("packed-ints.bin", IOContext.DEFAULT);
+        mutable.save(out);
+        out.close();
+
+        IndexInput in = directory.openInput("packed-ints.bin", IOContext.DEFAULT);
+        PackedInts.Reader reader = PackedInts.getReader(in);
+        assertEquals(mutable.getBitsPerValue(), reader.getBitsPerValue());
+        assertEquals(valueCount, reader.size());
+        if (mutable instanceof Packed64SingleBlock) {
+          // make sure that we used the right format so that the reader has
+          // the same performance characteristics as the mutable that has been
+          // serialized
+          assertTrue(reader instanceof Packed64SingleBlock);
+        } else {
+          assertFalse(reader instanceof Packed64SingleBlock);
+        }
+        for (int i = 0; i < valueCount; ++i) {
+          assertEquals(mutable.get(i), reader.get(i));
+        }
+        in.close();
+        directory.deleteFile("packed-ints.bin");
+      }
+    }
+  }
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1349859&r1=1349858&r2=1349859&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Wed Jun 13 14:05:01 2012
@@ -99,8 +99,8 @@ public class RandomCodec extends Lucene4
         new NestedPulsingPostingsFormat(),
         new Lucene40WithOrds(),
         new SimpleTextPostingsFormat(),
-        new MemoryPostingsFormat(true),
-        new MemoryPostingsFormat(false));
+        new MemoryPostingsFormat(true, random.nextFloat()),
+        new MemoryPostingsFormat(false, random.nextFloat()));
 
     Collections.shuffle(formats, random);
   }