You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/02/03 16:52:34 UTC

svn commit: r1066838 - in /lucene/dev/branches/bulkpostings/lucene/src: java/org/apache/lucene/index/codecs/ java/org/apache/lucene/index/codecs/intblock/ java/org/apache/lucene/index/codecs/simple64/ test/org/apache/lucene/index/ test/org/apache/lucen...

Author: mikemccand
Date: Thu Feb  3 15:52:33 2011
New Revision: 1066838

URL: http://svn.apache.org/viewvc?rev=1066838&view=rev
Log:
LUCENE-2886: add Simple64 fixed & var intblock experimental codec

Added:
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java   (with props)
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java   (with props)
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java   (with props)
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java   (with props)
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java   (with props)
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java   (with props)
Modified:
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Thu Feb  3 15:52:33 2011
@@ -26,6 +26,8 @@ import java.util.Set;
 import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
 import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
 import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
+import org.apache.lucene.index.codecs.simple64.Simple64Codec;
+import org.apache.lucene.index.codecs.simple64.Simple64VarIntCodec;
 import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
@@ -51,7 +53,7 @@ public class CodecProvider {
 
   private final Set<String> knownExtensions = new HashSet<String>();
 
-  public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2", "BulkVInt"};
+  public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2", "BulkVInt", "Simple64", "Simple64VarInt"};
 
   public synchronized void register(Codec codec) {
     if (codec.name == null) {
@@ -178,5 +180,7 @@ class DefaultCodecProvider extends Codec
     register(new FrameOfRefCodec());
     register(new PForDeltaFixedIntBlockCodec(128));
     register(new BulkVIntCodec(128));
+    register(new Simple64Codec(128));
+    register(new Simple64VarIntCodec());
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Thu Feb  3 15:52:33 2011
@@ -70,7 +70,8 @@ public abstract class VariableIntBlockIn
 
   public interface BlockReader {
     public int readBlock() throws IOException;
-    public void seek(long pos) throws IOException;
+    // nocommit -- do we really need?
+    //public void seek(long pos) throws IOException;
   }
 
   public static class Reader extends BulkPostingsEnum.BlockReader {
@@ -80,7 +81,7 @@ public abstract class VariableIntBlockIn
 
     private int offset;
     private long lastBlockFP;
-    private int blockSize;
+    //private int blockSize;                        // nocommit redundant w/ limit?
     private final BlockReader blockReader;
     private int limit;
 
@@ -99,9 +100,10 @@ public abstract class VariableIntBlockIn
       if (fp != lastBlockFP) {
         // Seek to new block
         in.seek(fp);
-        blockReader.seek(fp);
+        // nocommit -- why?
+        //blockReader.seek(fp);
         lastBlockFP = fp;
-        limit = blockSize = blockReader.readBlock();
+        limit = blockReader.readBlock();
       } else {
         // Seek w/in current block
       }
@@ -115,6 +117,7 @@ public abstract class VariableIntBlockIn
       // non-causal, ie must see future int values to
       // encode the current ones.
       while(offset >= limit) {
+        //System.out.println("NON CAUSAL! offset=" + offset + " limit=" + limit);
         offset -= limit;
         fill();
       }
@@ -147,8 +150,7 @@ public abstract class VariableIntBlockIn
       // fill -- but we need it to detect seek w/in block
       // case:
       lastBlockFP = in.getFilePointer();
-      blockSize = blockReader.readBlock();
-      return limit = blockSize;
+      return limit = blockReader.readBlock();
     }
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Thu Feb  3 15:52:33 2011
@@ -123,6 +123,11 @@ public abstract class VariableIntBlockIn
       lastUpto = upto;
       lastFP = fp;
     }
+
+    @Override
+    public String toString() {
+      return "VarIntBlock.Output fp=" + fp + " upto=" + upto;
+    }
   }
 
   private boolean abort;

Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,978 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index.codecs.simple64;
+
+import java.nio.LongBuffer;
+
+public class Simple64 {
+
+  protected int compressedSize = 0;
+  protected LongBuffer compressedBuffer;
+
+  /** Uncompressed data */
+  protected int[] unCompressedData;
+  /** Offset into unCompressedData */
+  protected int offset;
+  /** Size of unCompressedData, -1 when not available. */
+  protected int unComprSize = -1;
+
+  public void setCompressedBuffer(final LongBuffer compressedBuffer) {
+    this.compressedBuffer = compressedBuffer;
+  }
+
+  public void setUnCompressedData(final int[] unCompressedData,
+      final int offset, final int unComprSize) {
+    this.unCompressedData = unCompressedData;
+    this.offset = offset;
+    this.unComprSize = unComprSize;
+  }
+
+  public int compressedSize() {
+    return this.compressedSize;
+  }
+
+  private static final int NUM_DATA_BITS = 60;
+  private static final long BITS_60_MASK = (1L << 60) - 1;
+  private static final long BITS_30_MASK = (1 << 30) - 1;
+  private static final long BITS_20_MASK = (1 << 20) - 1;
+  private static final long BITS_15_MASK = (1 << 15) - 1;
+  private static final long BITS_12_MASK = (1 << 12) - 1;
+  private static final long BITS_11_MASK = (1 << 11) - 1;
+  private static final long BITS_10_MASK = (1 << 10) - 1;
+  private static final long BITS_8_MASK = (1 << 8) - 1; // 4 bits unused, then the last value take them
+  private static final long BITS_7_MASK = (1 << 7) - 1; // 4 bits unused, then the last value take them
+  private static final long BITS_6_MASK = (1 << 6) - 1;
+  private static final long BITS_5_MASK = (1 << 5) - 1;
+  private static final long BITS_4_MASK = (1 << 4) - 1;
+  private static final long BITS_3_MASK = (1 << 3) - 1;
+  private static final long BITS_2_MASK = (1 << 2) - 1;
+  private static final long BITS_1_MASK = (1 << 1) - 1;
+
+  private static final int STATUS_1NUM_60BITS = 14;
+  private static final int STATUS_2NUM_30BITS = 13;
+  private static final int STATUS_3NUM_20BITS = 12;
+  private static final int STATUS_4NUM_15BITS = 11;
+  private static final int STATUS_5NUM_12BITS = 10;
+  private static final int STATUS_6NUM_10BITS = 9;
+  private static final int STATUS_7NUM_8BITS = 8;
+  private static final int STATUS_8NUM_7BITS = 7;
+  private static final int STATUS_10NUM_6BITS = 6;
+  private static final int STATUS_12NUM_5BITS = 5;
+  private static final int STATUS_15NUM_4BITS = 4;
+  private static final int STATUS_20NUM_3BITS = 3;
+  private static final int STATUS_30NUM_2BITS = 2;
+  private static final int STATUS_60NUM_1BITS = 1;
+
+  private int inputCompressable = 1;
+  private int minBits = 1;
+  private long maxFitPlus1 = (1 << minBits);
+  private final int[] pending = new int[100];     // nocommit -- 60 or 61 should do?
+  private int inputCount;
+
+  private void reset() {
+    inputCompressable = 1;
+    minBits = 1;
+    inputCount = 0;
+    maxFitPlus1 = (1 << minBits);
+  }
+
+  // nocommit -- need low level test that streaming api
+  // didn't break anything
+
+  // Returns 0 if no new long written, else returns number
+  // of input values and out[0] has the long to write
+  public int add(int v, long[] out) {
+    //System.out.println("S64.add v=" + v + " " + (1 + inputCount - inputCompressable) + " waiting");
+    pending[inputCount++] = v;
+    while(inputCompressable <= inputCount) {
+      final long nextData = pending[(inputCompressable-1)] & 0xFFFFFFFFL;
+      //System.out.println("  cycle: data=" + nextData);
+      while ((nextData >= maxFitPlus1) && (minBits < NUM_DATA_BITS)) {
+        //System.out.println("  cycle maxFitPlus1=" + maxFitPlus1 + " minBits=" + minBits);
+        if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) {
+          break;
+        } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) {
+          break;
+        } else {
+          //System.out.println("  advance");
+          minBits++;
+          maxFitPlus1 <<= 1;
+          if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+            inputCompressable--;
+            //System.out.println("  hard break");
+            break;
+          }
+        }
+      }
+      inputCompressable++;
+
+      //System.out.println("  minBits=" + minBits + " count=" + (inputCompressable-1) + " inputCount=" + inputCount);
+
+      if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+        // Time to compress!
+        inputCompressable--;
+        //System.out.println("  FLUSH count=" + inputCompressable);
+
+        // nocommit -- it should always be > 0... right??
+        assert inputCompressable > 0;
+
+        // Check whether a bigger number of bits can be used:
+        while ((inputCompressable * (minBits+1)) <= NUM_DATA_BITS) {
+          minBits++;
+          //System.out.println("  incr minBits=" + minBits);
+        }
+
+        /*
+          if (((inputCompressable+1) * minBits) <= NUM_DATA_BITS) {
+          // not enough input available for minBits
+          minBits++;
+          // do not compress all available input
+          inputCompressable = NUM_DATA_BITS / minBits;
+          }
+        */
+
+        // Put compression method in status bits and encode input data
+        long s9;
+        final int consumed;
+        switch (minBits) { // add status bits and later input values
+        case 60:
+          s9 = STATUS_1NUM_60BITS;
+          s9 |= (long) (pending[0]) << 4;
+          consumed = 1;
+          break;
+        case 30:
+          s9 = STATUS_2NUM_30BITS;
+          // nocommit -- make a single expr instead of |'ing ?
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 34;
+          consumed = 2;
+          break;
+        case 20:
+          s9 = STATUS_3NUM_20BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 24;
+          s9 |= (long) (pending[2]) << 44;
+          consumed = 3;
+          break;
+        case 15:
+          s9 = STATUS_4NUM_15BITS;
+          s9 |= ((long) (pending[0]) << 4);
+          s9 |= (long) (pending[1]) << 19;
+          s9 |= (long) (pending[2]) << 34;
+          s9 |= (long) (pending[3]) << 49;
+          consumed = 4;
+          break;
+        case 12:
+          s9 = STATUS_5NUM_12BITS;
+          s9 |= ((long) (pending[0]) << 4);
+          s9 |= (long) (pending[1]) << 16;
+          s9 |= (long) (pending[2]) << 28;
+          s9 |= (long) (pending[3]) << 40;
+          s9 |= (long) (pending[4]) << 52;
+          consumed = 5;
+          break;
+        case 10:
+          s9 = STATUS_6NUM_10BITS;
+          s9 |= ((long) (pending[0]) << 4);
+          s9 |= (long) (pending[1]) << 14;
+          s9 |= (long) (pending[2]) << 24;
+          s9 |= (long) (pending[3]) << 34;
+          s9 |= (long) (pending[4]) << 44;
+          s9 |= (long) (pending[5]) << 54;
+          consumed = 6;
+          break;
+        case 8:
+          s9 = STATUS_7NUM_8BITS;
+          s9 |= ((long) (pending[0]) << 4);
+          s9 |= (long) (pending[1]) << 12;
+          s9 |= (long) (pending[2]) << 20;
+          s9 |= (long) (pending[3]) << 28;
+          s9 |= (long) (pending[4]) << 36;
+          s9 |= (long) (pending[5]) << 44;
+          s9 |= (long) (pending[6]) << 52; // 4 more bits
+          consumed = 7;
+          break;
+        case 7:
+          s9 = STATUS_8NUM_7BITS;
+          s9 |= ((long) (pending[0]) << 4);
+          s9 |= (long) (pending[1]) << 11;
+          s9 |= (long) (pending[2]) << 18;
+          s9 |= (long) (pending[3]) << 25;
+          s9 |= (long) (pending[4]) << 32;
+          s9 |= (long) (pending[5]) << 39;
+          s9 |= (long) (pending[6]) << 46;
+          s9 |= (long) (pending[7]) << 53; // 4 more bits
+          consumed = 8;
+          break;
+        case 6:
+          s9 = STATUS_10NUM_6BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 10;
+          s9 |= (long) (pending[2]) << 16;
+          s9 |= (long) (pending[3]) << 22;
+          s9 |= (long) (pending[4]) << 28;
+          s9 |= (long) (pending[5]) << 34;
+          s9 |= (long) (pending[6]) << 40;
+          s9 |= (long) (pending[7]) << 46;
+          s9 |= (long) (pending[8]) << 52;
+          s9 |= (long) (pending[9]) << 58;
+          consumed = 10;
+          break;
+        case 5:
+          s9 = STATUS_12NUM_5BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 9;
+          s9 |= (long) (pending[2]) << 14;
+          s9 |= (long) (pending[3]) << 19;
+          s9 |= (long) (pending[4]) << 24;
+          s9 |= (long) (pending[5]) << 29;
+          s9 |= (long) (pending[6]) << 34;
+          s9 |= (long) (pending[7]) << 39;
+          s9 |= (long) (pending[8]) << 44;
+          s9 |= (long) (pending[9]) << 49;
+          s9 |= (long) (pending[10]) << 54;
+          s9 |= (long) (pending[11]) << 59;
+          consumed = 12;
+          break;
+        case 4:
+          s9 = STATUS_15NUM_4BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 8;
+          s9 |= (long) (pending[2]) << 12;
+          s9 |= (long) (pending[3]) << 16;
+          s9 |= (long) (pending[4]) << 20;
+          s9 |= (long) (pending[5]) << 24;
+          s9 |= (long) (pending[6]) << 28;
+          s9 |= (long) (pending[7]) << 32;
+          s9 |= (long) (pending[8]) << 36;
+          s9 |= (long) (pending[9]) << 40;
+          s9 |= (long) (pending[10]) << 44;
+          s9 |= (long) (pending[11]) << 48;
+          s9 |= (long) (pending[12]) << 52;
+          s9 |= (long) (pending[13]) << 56;
+          s9 |= (long) (pending[14]) << 60;
+          consumed = 15;
+          break;
+        case 3:
+          s9 = STATUS_20NUM_3BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 7;
+          s9 |= (long) (pending[2]) << 10;
+          s9 |= (long) (pending[3]) << 13;
+          s9 |= (long) (pending[4]) << 16;
+          s9 |= (long) (pending[5]) << 19;
+          s9 |= (long) (pending[6]) << 22;
+          s9 |= (long) (pending[7]) << 25;
+          s9 |= (long) (pending[8]) << 28;
+          s9 |= (long) (pending[9]) << 31;
+          s9 |= (long) (pending[10]) << 34;
+          s9 |= (long) (pending[11]) << 37;
+          s9 |= (long) (pending[12]) << 40;
+          s9 |= (long) (pending[13]) << 43;
+          s9 |= (long) (pending[14]) << 46;
+          s9 |= (long) (pending[15]) << 49;
+          s9 |= (long) (pending[16]) << 52;
+          s9 |= (long) (pending[17]) << 55;
+          s9 |= (long) (pending[18]) << 58;
+          s9 |= (long) (pending[19]) << 61;
+          consumed = 20;
+          break;
+        case 2:
+          s9 = STATUS_30NUM_2BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 6;
+          s9 |= (long) (pending[2]) << 8;
+          s9 |= (long) (pending[3]) << 10;
+          s9 |= (long) (pending[4]) << 12;
+          s9 |= (long) (pending[5]) << 14;
+          s9 |= (long) (pending[6]) << 16;
+          s9 |= (long) (pending[7]) << 18;
+          s9 |= (long) (pending[8]) << 20;
+          s9 |= (long) (pending[9]) << 22;
+          s9 |= (long) (pending[10]) << 24;
+          s9 |= (long) (pending[11]) << 26;
+          s9 |= (long) (pending[12]) << 28;
+          s9 |= (long) (pending[13]) << 30;
+          s9 |= (long) (pending[14]) << 32;
+          s9 |= (long) (pending[15]) << 34;
+          s9 |= (long) (pending[16]) << 36;
+          s9 |= (long) (pending[17]) << 38;
+          s9 |= (long) (pending[18]) << 40;
+          s9 |= (long) (pending[19]) << 42;
+          s9 |= (long) (pending[20]) << 44;
+          s9 |= (long) (pending[21]) << 46;
+          s9 |= (long) (pending[22]) << 48;
+          s9 |= (long) (pending[23]) << 50;
+          s9 |= (long) (pending[24]) << 52;
+          s9 |= (long) (pending[25]) << 54;
+          s9 |= (long) (pending[26]) << 56;
+          s9 |= (long) (pending[27]) << 58;
+          s9 |= (long) (pending[28]) << 60;
+          s9 |= (long) (pending[29]) << 62;
+          consumed = 30;
+          break;
+        case 1:
+          s9 = STATUS_60NUM_1BITS;
+          s9 |= (long) (pending[0]) << 4;
+          s9 |= (long) (pending[1]) << 5;
+          s9 |= (long) (pending[2]) << 6;
+          s9 |= (long) (pending[3]) << 7;
+          s9 |= (long) (pending[4]) << 8;
+          s9 |= (long) (pending[5]) << 9;
+          s9 |= (long) (pending[6]) << 10;
+          s9 |= (long) (pending[7]) << 11;
+          s9 |= (long) (pending[8]) << 12;
+          s9 |= (long) (pending[9]) << 13;
+          s9 |= (long) (pending[10]) << 14;
+          s9 |= (long) (pending[11]) << 15;
+          s9 |= (long) (pending[12]) << 16;
+          s9 |= (long) (pending[13]) << 17;
+          s9 |= (long) (pending[14]) << 18;
+          s9 |= (long) (pending[15]) << 19;
+          s9 |= (long) (pending[16]) << 20;
+          s9 |= (long) (pending[17]) << 21;
+          s9 |= (long) (pending[18]) << 22;
+          s9 |= (long) (pending[19]) << 23;
+          s9 |= (long) (pending[20]) << 24;
+          s9 |= (long) (pending[21]) << 25;
+          s9 |= (long) (pending[22]) << 26;
+          s9 |= (long) (pending[23]) << 27;
+          s9 |= (long) (pending[24]) << 28;
+          s9 |= (long) (pending[25]) << 29;
+          s9 |= (long) (pending[26]) << 30;
+          s9 |= (long) (pending[27]) << 31;
+          s9 |= (long) (pending[28]) << 32;
+          s9 |= (long) (pending[29]) << 33;
+          s9 |= (long) (pending[30]) << 34;
+          s9 |= (long) (pending[31]) << 35;
+          s9 |= (long) (pending[32]) << 36;
+          s9 |= (long) (pending[33]) << 37;
+          s9 |= (long) (pending[34]) << 38;
+          s9 |= (long) (pending[35]) << 39;
+          s9 |= (long) (pending[36]) << 40;
+          s9 |= (long) (pending[37]) << 41;
+          s9 |= (long) (pending[38]) << 42;
+          s9 |= (long) (pending[39]) << 43;
+          s9 |= (long) (pending[40]) << 44;
+          s9 |= (long) (pending[41]) << 45;
+          s9 |= (long) (pending[42]) << 46;
+          s9 |= (long) (pending[43]) << 47;
+          s9 |= (long) (pending[44]) << 48;
+          s9 |= (long) (pending[45]) << 49;
+          s9 |= (long) (pending[46]) << 50;
+          s9 |= (long) (pending[47]) << 51;
+          s9 |= (long) (pending[48]) << 52;
+          s9 |= (long) (pending[49]) << 53;
+          s9 |= (long) (pending[50]) << 54;
+          s9 |= (long) (pending[51]) << 55;
+          s9 |= (long) (pending[52]) << 56;
+          s9 |= (long) (pending[53]) << 57;
+          s9 |= (long) (pending[54]) << 58;
+          s9 |= (long) (pending[55]) << 59;
+          s9 |= (long) (pending[56]) << 60;
+          s9 |= (long) (pending[57]) << 61;
+          s9 |= (long) (pending[58]) << 62;
+          s9 |= (long) (pending[59]) << 63;
+          consumed = 60;
+          break;
+        default:
+          assert false;
+          s9 = 0;
+          consumed = 60;
+          //throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits);
+        }
+
+        final int leftover = inputCount - consumed;
+        assert leftover >= 0: "consumed=" + consumed + " vs " + inputCompressable;
+
+        /*
+        for(int x=0;x<consumed;x++) {
+          System.out.println(""+pending[x]);
+        }
+        */
+        /*
+        // like cd burning!  ;)
+        {
+        int[] test = new int[consumed];
+        int ct = decompressSingle(s9, test, 0);
+        assert ct == consumed;
+        for(int x=0;x<ct;x++) {
+        assert test[x] == pending[x];
+        }
+        }
+        */
+
+        //System.out.println("  return consumed=" + consumed);
+        reset();
+
+        // save leftovers:
+        for(int i=0;i<leftover;i++) {
+          pending[i] = pending[consumed+i];
+        }
+        inputCount = leftover;
+
+        out[0] = s9;
+        return consumed;
+      }
+    }
+
+    return 0;
+  }
+
+  public static int compressSingle(final int[] uncompressed, final int inOffset, final int inSize, final LongBuffer compressedBuffer, final int outOffset) {
+    if (inSize < 1) {
+      throw new IllegalArgumentException("Cannot compress input with non positive size " + inSize);
+    }
+    int inputCompressable = 1;
+    int minBits = 1;
+    long maxFitPlus1 = (1 << minBits);
+    long nextData;
+
+    do {
+      nextData = uncompressed[inOffset + inputCompressable - 1] & 0xFFFFFFFFL;
+      if (nextData < 0) {
+        throw new IllegalArgumentException("Cannot compress negative input " + nextData + " (at index " + (inOffset+inputCompressable-1) + ")");
+      }
+      while ((nextData >= maxFitPlus1) && (minBits < NUM_DATA_BITS))
+      {
+        if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) {
+          break;
+        } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) {
+          break;
+        } else {
+          minBits++;
+          maxFitPlus1 <<= 1;
+          if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+            inputCompressable--;
+            break;
+          }
+        }
+      }
+      inputCompressable++;
+    } while (((inputCompressable * minBits) <= NUM_DATA_BITS) && (inputCompressable <= inSize));
+
+    inputCompressable--;
+    if (inputCompressable == 0) {
+      throw new IllegalArgumentException("Cannot compress input " + nextData + " with more than " + NUM_DATA_BITS + " bits (at offSet " + inOffset + ")");
+    }
+
+    // Check whether a bigger number of bits can be used:
+    while ((inputCompressable * (minBits+1)) <= NUM_DATA_BITS) {
+      minBits++;
+    }
+
+    if (((inputCompressable+1) * minBits) <= NUM_DATA_BITS) {
+      // not enough input available for minBits
+      minBits++;
+      // do not compress all available input
+      inputCompressable = NUM_DATA_BITS / minBits;
+    }
+
+    // Put compression method in status bits and encode input data
+    long s9;
+    switch (minBits) { // add status bits and later input values
+    case 60:
+      s9 = STATUS_1NUM_60BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      compressedBuffer.put(outOffset, s9);
+      return 1;
+    case 30:
+      s9 = STATUS_2NUM_30BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 34;
+      compressedBuffer.put(outOffset, s9);
+      return 2;
+    case 20:
+      s9 = STATUS_3NUM_20BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 24;
+      s9 |= (long) (uncompressed[inOffset+2]) << 44;
+      compressedBuffer.put(outOffset, s9);
+      return 3;
+    case 15:
+      s9 = STATUS_4NUM_15BITS;
+      s9 |= ((long) (uncompressed[inOffset]) << 4);
+      s9 |= (long) (uncompressed[inOffset+1]) << 19;
+      s9 |= (long) (uncompressed[inOffset+2]) << 34;
+      s9 |= (long) (uncompressed[inOffset+3]) << 49;
+      compressedBuffer.put(outOffset, s9);
+      return 4;
+    case 12:
+      s9 = STATUS_5NUM_12BITS;
+      s9 |= ((long) (uncompressed[inOffset]) << 4);
+      s9 |= (long) (uncompressed[inOffset+1]) << 16;
+      s9 |= (long) (uncompressed[inOffset+2]) << 28;
+      s9 |= (long) (uncompressed[inOffset+3]) << 40;
+      s9 |= (long) (uncompressed[inOffset+4]) << 52;
+      compressedBuffer.put(outOffset, s9);
+      return 5;
+    case 10:
+      s9 = STATUS_6NUM_10BITS;
+      s9 |= ((long) (uncompressed[inOffset]) << 4);
+      s9 |= (long) (uncompressed[inOffset+1]) << 14;
+      s9 |= (long) (uncompressed[inOffset+2]) << 24;
+      s9 |= (long) (uncompressed[inOffset+3]) << 34;
+      s9 |= (long) (uncompressed[inOffset+4]) << 44;
+      s9 |= (long) (uncompressed[inOffset+5]) << 54;
+      compressedBuffer.put(outOffset, s9);
+      return 6;
+    case 8:
+      s9 = STATUS_7NUM_8BITS;
+      s9 |= ((long) (uncompressed[inOffset]) << 4);
+      s9 |= (long) (uncompressed[inOffset+1]) << 12;
+      s9 |= (long) (uncompressed[inOffset+2]) << 20;
+      s9 |= (long) (uncompressed[inOffset+3]) << 28;
+      s9 |= (long) (uncompressed[inOffset+4]) << 36;
+      s9 |= (long) (uncompressed[inOffset+5]) << 44;
+      s9 |= (long) (uncompressed[inOffset+6]) << 52; // 4 more bits
+      compressedBuffer.put(outOffset, s9);
+      return 7;
+    case 7:
+      s9 = STATUS_8NUM_7BITS;
+      s9 |= ((long) (uncompressed[inOffset]) << 4);
+      s9 |= (long) (uncompressed[inOffset+1]) << 11;
+      s9 |= (long) (uncompressed[inOffset+2]) << 18;
+      s9 |= (long) (uncompressed[inOffset+3]) << 25;
+      s9 |= (long) (uncompressed[inOffset+4]) << 32;
+      s9 |= (long) (uncompressed[inOffset+5]) << 39;
+      s9 |= (long) (uncompressed[inOffset+6]) << 46;
+      s9 |= (long) (uncompressed[inOffset+7]) << 53; // 4 more bits
+      compressedBuffer.put(outOffset, s9);
+      return 8;
+    case 6:
+      s9 = STATUS_10NUM_6BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 10;
+      s9 |= (long) (uncompressed[inOffset+2]) << 16;
+      s9 |= (long) (uncompressed[inOffset+3]) << 22;
+      s9 |= (long) (uncompressed[inOffset+4]) << 28;
+      s9 |= (long) (uncompressed[inOffset+5]) << 34;
+      s9 |= (long) (uncompressed[inOffset+6]) << 40;
+      s9 |= (long) (uncompressed[inOffset+7]) << 46;
+      s9 |= (long) (uncompressed[inOffset+8]) << 52;
+      s9 |= (long) (uncompressed[inOffset+9]) << 58;
+      compressedBuffer.put(outOffset, s9);
+      return 10;
+    case 5:
+      s9 = STATUS_12NUM_5BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 9;
+      s9 |= (long) (uncompressed[inOffset+2]) << 14;
+      s9 |= (long) (uncompressed[inOffset+3]) << 19;
+      s9 |= (long) (uncompressed[inOffset+4]) << 24;
+      s9 |= (long) (uncompressed[inOffset+5]) << 29;
+      s9 |= (long) (uncompressed[inOffset+6]) << 34;
+      s9 |= (long) (uncompressed[inOffset+7]) << 39;
+      s9 |= (long) (uncompressed[inOffset+8]) << 44;
+      s9 |= (long) (uncompressed[inOffset+9]) << 49;
+      s9 |= (long) (uncompressed[inOffset+10]) << 54;
+      s9 |= (long) (uncompressed[inOffset+11]) << 59;
+      compressedBuffer.put(outOffset, s9);
+      return 12;
+    case 4:
+      s9 = STATUS_15NUM_4BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 8;
+      s9 |= (long) (uncompressed[inOffset+2]) << 12;
+      s9 |= (long) (uncompressed[inOffset+3]) << 16;
+      s9 |= (long) (uncompressed[inOffset+4]) << 20;
+      s9 |= (long) (uncompressed[inOffset+5]) << 24;
+      s9 |= (long) (uncompressed[inOffset+6]) << 28;
+      s9 |= (long) (uncompressed[inOffset+7]) << 32;
+      s9 |= (long) (uncompressed[inOffset+8]) << 36;
+      s9 |= (long) (uncompressed[inOffset+9]) << 40;
+      s9 |= (long) (uncompressed[inOffset+10]) << 44;
+      s9 |= (long) (uncompressed[inOffset+11]) << 48;
+      s9 |= (long) (uncompressed[inOffset+12]) << 52;
+      s9 |= (long) (uncompressed[inOffset+13]) << 56;
+      s9 |= (long) (uncompressed[inOffset+14]) << 60;
+      compressedBuffer.put(outOffset, s9);
+      return 15;
+    case 3:
+      s9 = STATUS_20NUM_3BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 7;
+      s9 |= (long) (uncompressed[inOffset+2]) << 10;
+      s9 |= (long) (uncompressed[inOffset+3]) << 13;
+      s9 |= (long) (uncompressed[inOffset+4]) << 16;
+      s9 |= (long) (uncompressed[inOffset+5]) << 19;
+      s9 |= (long) (uncompressed[inOffset+6]) << 22;
+      s9 |= (long) (uncompressed[inOffset+7]) << 25;
+      s9 |= (long) (uncompressed[inOffset+8]) << 28;
+      s9 |= (long) (uncompressed[inOffset+9]) << 31;
+      s9 |= (long) (uncompressed[inOffset+10]) << 34;
+      s9 |= (long) (uncompressed[inOffset+11]) << 37;
+      s9 |= (long) (uncompressed[inOffset+12]) << 40;
+      s9 |= (long) (uncompressed[inOffset+13]) << 43;
+      s9 |= (long) (uncompressed[inOffset+14]) << 46;
+      s9 |= (long) (uncompressed[inOffset+15]) << 49;
+      s9 |= (long) (uncompressed[inOffset+16]) << 52;
+      s9 |= (long) (uncompressed[inOffset+17]) << 55;
+      s9 |= (long) (uncompressed[inOffset+18]) << 58;
+      s9 |= (long) (uncompressed[inOffset+19]) << 61;
+      compressedBuffer.put(outOffset, s9);
+      return 20;
+    case 2:
+      s9 = STATUS_30NUM_2BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 6;
+      s9 |= (long) (uncompressed[inOffset+2]) << 8;
+      s9 |= (long) (uncompressed[inOffset+3]) << 10;
+      s9 |= (long) (uncompressed[inOffset+4]) << 12;
+      s9 |= (long) (uncompressed[inOffset+5]) << 14;
+      s9 |= (long) (uncompressed[inOffset+6]) << 16;
+      s9 |= (long) (uncompressed[inOffset+7]) << 18;
+      s9 |= (long) (uncompressed[inOffset+8]) << 20;
+      s9 |= (long) (uncompressed[inOffset+9]) << 22;
+      s9 |= (long) (uncompressed[inOffset+10]) << 24;
+      s9 |= (long) (uncompressed[inOffset+11]) << 26;
+      s9 |= (long) (uncompressed[inOffset+12]) << 28;
+      s9 |= (long) (uncompressed[inOffset+13]) << 30;
+      s9 |= (long) (uncompressed[inOffset+14]) << 32;
+      s9 |= (long) (uncompressed[inOffset+15]) << 34;
+      s9 |= (long) (uncompressed[inOffset+16]) << 36;
+      s9 |= (long) (uncompressed[inOffset+17]) << 38;
+      s9 |= (long) (uncompressed[inOffset+18]) << 40;
+      s9 |= (long) (uncompressed[inOffset+19]) << 42;
+      s9 |= (long) (uncompressed[inOffset+20]) << 44;
+      s9 |= (long) (uncompressed[inOffset+21]) << 46;
+      s9 |= (long) (uncompressed[inOffset+22]) << 48;
+      s9 |= (long) (uncompressed[inOffset+23]) << 50;
+      s9 |= (long) (uncompressed[inOffset+24]) << 52;
+      s9 |= (long) (uncompressed[inOffset+25]) << 54;
+      s9 |= (long) (uncompressed[inOffset+26]) << 56;
+      s9 |= (long) (uncompressed[inOffset+27]) << 58;
+      s9 |= (long) (uncompressed[inOffset+28]) << 60;
+      s9 |= (long) (uncompressed[inOffset+29]) << 62;
+      compressedBuffer.put(outOffset, s9);
+      return 30;
+    case 1:
+      s9 = STATUS_60NUM_1BITS;
+      s9 |= (long) (uncompressed[inOffset]) << 4;
+      s9 |= (long) (uncompressed[inOffset+1]) << 5;
+      s9 |= (long) (uncompressed[inOffset+2]) << 6;
+      s9 |= (long) (uncompressed[inOffset+3]) << 7;
+      s9 |= (long) (uncompressed[inOffset+4]) << 8;
+      s9 |= (long) (uncompressed[inOffset+5]) << 9;
+      s9 |= (long) (uncompressed[inOffset+6]) << 10;
+      s9 |= (long) (uncompressed[inOffset+7]) << 11;
+      s9 |= (long) (uncompressed[inOffset+8]) << 12;
+      s9 |= (long) (uncompressed[inOffset+9]) << 13;
+      s9 |= (long) (uncompressed[inOffset+10]) << 14;
+      s9 |= (long) (uncompressed[inOffset+11]) << 15;
+      s9 |= (long) (uncompressed[inOffset+12]) << 16;
+      s9 |= (long) (uncompressed[inOffset+13]) << 17;
+      s9 |= (long) (uncompressed[inOffset+14]) << 18;
+      s9 |= (long) (uncompressed[inOffset+15]) << 19;
+      s9 |= (long) (uncompressed[inOffset+16]) << 20;
+      s9 |= (long) (uncompressed[inOffset+17]) << 21;
+      s9 |= (long) (uncompressed[inOffset+18]) << 22;
+      s9 |= (long) (uncompressed[inOffset+19]) << 23;
+      s9 |= (long) (uncompressed[inOffset+20]) << 24;
+      s9 |= (long) (uncompressed[inOffset+21]) << 25;
+      s9 |= (long) (uncompressed[inOffset+22]) << 26;
+      s9 |= (long) (uncompressed[inOffset+23]) << 27;
+      s9 |= (long) (uncompressed[inOffset+24]) << 28;
+      s9 |= (long) (uncompressed[inOffset+25]) << 29;
+      s9 |= (long) (uncompressed[inOffset+26]) << 30;
+      s9 |= (long) (uncompressed[inOffset+27]) << 31;
+      s9 |= (long) (uncompressed[inOffset+28]) << 32;
+      s9 |= (long) (uncompressed[inOffset+29]) << 33;
+      s9 |= (long) (uncompressed[inOffset+30]) << 34;
+      s9 |= (long) (uncompressed[inOffset+31]) << 35;
+      s9 |= (long) (uncompressed[inOffset+32]) << 36;
+      s9 |= (long) (uncompressed[inOffset+33]) << 37;
+      s9 |= (long) (uncompressed[inOffset+34]) << 38;
+      s9 |= (long) (uncompressed[inOffset+35]) << 39;
+      s9 |= (long) (uncompressed[inOffset+36]) << 40;
+      s9 |= (long) (uncompressed[inOffset+37]) << 41;
+      s9 |= (long) (uncompressed[inOffset+38]) << 42;
+      s9 |= (long) (uncompressed[inOffset+39]) << 43;
+      s9 |= (long) (uncompressed[inOffset+40]) << 44;
+      s9 |= (long) (uncompressed[inOffset+41]) << 45;
+      s9 |= (long) (uncompressed[inOffset+42]) << 46;
+      s9 |= (long) (uncompressed[inOffset+43]) << 47;
+      s9 |= (long) (uncompressed[inOffset+44]) << 48;
+      s9 |= (long) (uncompressed[inOffset+45]) << 49;
+      s9 |= (long) (uncompressed[inOffset+46]) << 50;
+      s9 |= (long) (uncompressed[inOffset+47]) << 51;
+      s9 |= (long) (uncompressed[inOffset+48]) << 52;
+      s9 |= (long) (uncompressed[inOffset+49]) << 53;
+      s9 |= (long) (uncompressed[inOffset+50]) << 54;
+      s9 |= (long) (uncompressed[inOffset+51]) << 55;
+      s9 |= (long) (uncompressed[inOffset+52]) << 56;
+      s9 |= (long) (uncompressed[inOffset+53]) << 57;
+      s9 |= (long) (uncompressed[inOffset+54]) << 58;
+      s9 |= (long) (uncompressed[inOffset+55]) << 59;
+      s9 |= (long) (uncompressed[inOffset+56]) << 60;
+      s9 |= (long) (uncompressed[inOffset+57]) << 61;
+      s9 |= (long) (uncompressed[inOffset+58]) << 62;
+      s9 |= (long) (uncompressed[inOffset+59]) << 63;
+      compressedBuffer.put(outOffset, s9);
+      return 60;
+    default:
+      throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits);
+    }
+  }
+
+
+  public static int decompressSingle(final long s9, final int[] decompressed, final int outOffset) {
+    //System.out.println("S64.decompressSingle header=" + (int) (s9&15));
+    // nocommit case s9 to int then & with 15???
+    switch ((int) (s9 & 15)) {
+    case STATUS_1NUM_60BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_60_MASK); // The value can't be bigger than 2^32-1
+      return 1;
+    case STATUS_2NUM_30BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_30_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 34) & BITS_30_MASK);
+      return 2;
+    case STATUS_3NUM_20BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_20_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 24) & BITS_20_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 44) & BITS_20_MASK);
+      return 3;
+    case STATUS_4NUM_15BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_15_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 19) & BITS_15_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 34) & BITS_15_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 49) & BITS_15_MASK);
+      return 4;
+    case STATUS_5NUM_12BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_12_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 16) & BITS_12_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 28) & BITS_12_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 40) & BITS_12_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 52) & BITS_12_MASK);
+      return 5;
+    case STATUS_6NUM_10BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_10_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 14) & BITS_10_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 24) & BITS_10_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 34) & BITS_10_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 44) & BITS_10_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 54) & BITS_10_MASK);
+      return 6;
+    case STATUS_7NUM_8BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_8_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 12) & BITS_8_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 20) & BITS_8_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 28) & BITS_8_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 36) & BITS_8_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 44) & BITS_8_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 52) & BITS_12_MASK);
+      return 7;
+    case STATUS_8NUM_7BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_7_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 11) & BITS_7_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 18) & BITS_7_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 25) & BITS_7_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 32) & BITS_7_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 39) & BITS_7_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 46) & BITS_7_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 53) & BITS_11_MASK);
+      return 8;
+    case STATUS_10NUM_6BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_6_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 10) & BITS_6_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 16) & BITS_6_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 22) & BITS_6_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 28) & BITS_6_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 34) & BITS_6_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 40) & BITS_6_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 46) & BITS_6_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 52) & BITS_6_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 58) & BITS_6_MASK);
+      return 10;
+    case STATUS_12NUM_5BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_5_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 9) & BITS_5_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 14) & BITS_5_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 19) & BITS_5_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 24) & BITS_5_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 29) & BITS_5_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 34) & BITS_5_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 39) & BITS_5_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 44) & BITS_5_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 49) & BITS_5_MASK);
+      decompressed[outOffset+10] = (int) ((s9 >>> 54) & BITS_5_MASK);
+      decompressed[outOffset+11] = (int) ((s9 >>> 59) & BITS_5_MASK);
+      return 12;
+    case STATUS_15NUM_4BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_4_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 8) & BITS_4_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 12) & BITS_4_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 16) & BITS_4_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 20) & BITS_4_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 24) & BITS_4_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 28) & BITS_4_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 32) & BITS_4_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 36) & BITS_4_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 40) & BITS_4_MASK);
+      decompressed[outOffset+10] = (int) ((s9 >>> 44) & BITS_4_MASK);
+      decompressed[outOffset+11] = (int) ((s9 >>> 48) & BITS_4_MASK);
+      decompressed[outOffset+12] = (int) ((s9 >>> 52) & BITS_4_MASK);
+      decompressed[outOffset+13] = (int) ((s9 >>> 56) & BITS_4_MASK);
+      decompressed[outOffset+14] = (int) ((s9 >>> 60) & BITS_4_MASK);
+      return 15;
+    case STATUS_20NUM_3BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_3_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 7) & BITS_3_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 10) & BITS_3_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 13) & BITS_3_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 16) & BITS_3_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 19) & BITS_3_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 22) & BITS_3_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 25) & BITS_3_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 28) & BITS_3_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 31) & BITS_3_MASK);
+      decompressed[outOffset+10] = (int) ((s9 >>> 34) & BITS_3_MASK);
+      decompressed[outOffset+11] = (int) ((s9 >>> 37) & BITS_3_MASK);
+      decompressed[outOffset+12] = (int) ((s9 >>> 40) & BITS_3_MASK);
+      decompressed[outOffset+13] = (int) ((s9 >>> 43) & BITS_3_MASK);
+      decompressed[outOffset+14] = (int) ((s9 >>> 46) & BITS_3_MASK);
+      decompressed[outOffset+15] = (int) ((s9 >>> 49) & BITS_3_MASK);
+      decompressed[outOffset+16] = (int) ((s9 >>> 52) & BITS_3_MASK);
+      decompressed[outOffset+17] = (int) ((s9 >>> 55) & BITS_3_MASK);
+      decompressed[outOffset+18] = (int) ((s9 >>> 58) & BITS_3_MASK);
+      decompressed[outOffset+19] = (int) ((s9 >>> 61) & BITS_3_MASK);
+      return 20;
+    case STATUS_30NUM_2BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_2_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 6) & BITS_2_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 8) & BITS_2_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 10) & BITS_2_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 12) & BITS_2_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 14) & BITS_2_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 16) & BITS_2_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 18) & BITS_2_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 20) & BITS_2_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 22) & BITS_2_MASK);
+      decompressed[outOffset+10] = (int) ((s9 >>> 24) & BITS_2_MASK);
+      decompressed[outOffset+11] = (int) ((s9 >>> 26) & BITS_2_MASK);
+      decompressed[outOffset+12] = (int) ((s9 >>> 28) & BITS_2_MASK);
+      decompressed[outOffset+13] = (int) ((s9 >>> 30) & BITS_2_MASK);
+      decompressed[outOffset+14] = (int) ((s9 >>> 32) & BITS_2_MASK);
+      decompressed[outOffset+15] = (int) ((s9 >>> 34) & BITS_2_MASK);
+      decompressed[outOffset+16] = (int) ((s9 >>> 36) & BITS_2_MASK);
+      decompressed[outOffset+17] = (int) ((s9 >>> 38) & BITS_2_MASK);
+      decompressed[outOffset+18] = (int) ((s9 >>> 40) & BITS_2_MASK);
+      decompressed[outOffset+19] = (int) ((s9 >>> 42) & BITS_2_MASK);
+      decompressed[outOffset+20] = (int) ((s9 >> 44) & BITS_2_MASK);
+      decompressed[outOffset+21] = (int) ((s9 >>> 46) & BITS_2_MASK);
+      decompressed[outOffset+22] = (int) ((s9 >>> 48) & BITS_2_MASK);
+      decompressed[outOffset+23] = (int) ((s9 >>> 50) & BITS_2_MASK);
+      decompressed[outOffset+24] = (int) ((s9 >>> 52) & BITS_2_MASK);
+      decompressed[outOffset+25] = (int) ((s9 >>> 54) & BITS_2_MASK);
+      decompressed[outOffset+26] = (int) ((s9 >>> 56) & BITS_2_MASK);
+      decompressed[outOffset+27] = (int) ((s9 >>> 58) & BITS_2_MASK);
+      decompressed[outOffset+28] = (int) ((s9 >>> 60) & BITS_2_MASK);
+      decompressed[outOffset+29] = (int) ((s9 >>> 62) & BITS_2_MASK);
+      return 30;
+    case STATUS_60NUM_1BITS:
+      decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_1_MASK);
+      decompressed[outOffset+1] = (int) ((s9 >>> 5) & BITS_1_MASK);
+      decompressed[outOffset+2] = (int) ((s9 >>> 6) & BITS_1_MASK);
+      decompressed[outOffset+3] = (int) ((s9 >>> 7) & BITS_1_MASK);
+      decompressed[outOffset+4] = (int) ((s9 >>> 8) & BITS_1_MASK);
+      decompressed[outOffset+5] = (int) ((s9 >>> 9) & BITS_1_MASK);
+      decompressed[outOffset+6] = (int) ((s9 >>> 10) & BITS_1_MASK);
+      decompressed[outOffset+7] = (int) ((s9 >>> 11) & BITS_1_MASK);
+      decompressed[outOffset+8] = (int) ((s9 >>> 12) & BITS_1_MASK);
+      decompressed[outOffset+9] = (int) ((s9 >>> 13) & BITS_1_MASK);
+      decompressed[outOffset+10] = (int) ((s9 >>> 14) & BITS_1_MASK);
+      decompressed[outOffset+11] = (int) ((s9 >>> 15) & BITS_1_MASK);
+      decompressed[outOffset+12] = (int) ((s9 >>> 16) & BITS_1_MASK);
+      decompressed[outOffset+13] = (int) ((s9 >>> 17) & BITS_1_MASK);
+      decompressed[outOffset+14] = (int) ((s9 >>> 18) & BITS_1_MASK);
+      decompressed[outOffset+15] = (int) ((s9 >>> 19) & BITS_1_MASK);
+      decompressed[outOffset+16] = (int) ((s9 >>> 20) & BITS_1_MASK);
+      decompressed[outOffset+17] = (int) ((s9 >>> 21) & BITS_1_MASK);
+      decompressed[outOffset+18] = (int) ((s9 >>> 22) & BITS_1_MASK);
+      decompressed[outOffset+19] = (int) ((s9 >>> 23) & BITS_1_MASK);
+      decompressed[outOffset+20] = (int) ((s9 >> 24) & BITS_1_MASK);
+      decompressed[outOffset+21] = (int) ((s9 >>> 25) & BITS_1_MASK);
+      decompressed[outOffset+22] = (int) ((s9 >>> 26) & BITS_1_MASK);
+      decompressed[outOffset+23] = (int) ((s9 >>> 27) & BITS_1_MASK);
+      decompressed[outOffset+24] = (int) ((s9 >>> 28) & BITS_1_MASK);
+      decompressed[outOffset+25] = (int) ((s9 >>> 29) & BITS_1_MASK);
+      decompressed[outOffset+26] = (int) ((s9 >>> 30) & BITS_1_MASK);
+      decompressed[outOffset+27] = (int) ((s9 >>> 31) & BITS_1_MASK);
+      decompressed[outOffset+28] = (int) ((s9 >>> 32) & BITS_1_MASK);
+      decompressed[outOffset+29] = (int) ((s9 >>> 33) & BITS_1_MASK);
+      decompressed[outOffset+30] = (int) ((s9 >> 34) & BITS_1_MASK);
+      decompressed[outOffset+31] = (int) ((s9 >>> 35) & BITS_1_MASK);
+      decompressed[outOffset+32] = (int) ((s9 >>> 36) & BITS_1_MASK);
+      decompressed[outOffset+33] = (int) ((s9 >>> 37) & BITS_1_MASK);
+      decompressed[outOffset+34] = (int) ((s9 >>> 38) & BITS_1_MASK);
+      decompressed[outOffset+35] = (int) ((s9 >>> 39) & BITS_1_MASK);
+      decompressed[outOffset+36] = (int) ((s9 >>> 40) & BITS_1_MASK);
+      decompressed[outOffset+37] = (int) ((s9 >>> 41) & BITS_1_MASK);
+      decompressed[outOffset+38] = (int) ((s9 >>> 42) & BITS_1_MASK);
+      decompressed[outOffset+39] = (int) ((s9 >>> 43) & BITS_1_MASK);
+      decompressed[outOffset+40] = (int) ((s9 >>> 44) & BITS_1_MASK);
+      decompressed[outOffset+41] = (int) ((s9 >>> 45) & BITS_1_MASK);
+      decompressed[outOffset+42] = (int) ((s9 >>> 46) & BITS_1_MASK);
+      decompressed[outOffset+43] = (int) ((s9 >>> 47) & BITS_1_MASK);
+      decompressed[outOffset+44] = (int) ((s9 >>> 48) & BITS_1_MASK);
+      decompressed[outOffset+45] = (int) ((s9 >>> 49) & BITS_1_MASK);
+      decompressed[outOffset+46] = (int) ((s9 >>> 50) & BITS_1_MASK);
+      decompressed[outOffset+47] = (int) ((s9 >>> 51) & BITS_1_MASK);
+      decompressed[outOffset+48] = (int) ((s9 >>> 52) & BITS_1_MASK);
+      decompressed[outOffset+49] = (int) ((s9 >>> 53) & BITS_1_MASK);
+      decompressed[outOffset+50] = (int) ((s9 >> 54) & BITS_1_MASK);
+      decompressed[outOffset+51] = (int) ((s9 >>> 55) & BITS_1_MASK);
+      decompressed[outOffset+52] = (int) ((s9 >>> 56) & BITS_1_MASK);
+      decompressed[outOffset+53] = (int) ((s9 >>> 57) & BITS_1_MASK);
+      decompressed[outOffset+54] = (int) ((s9 >>> 58) & BITS_1_MASK);
+      decompressed[outOffset+55] = (int) ((s9 >>> 59) & BITS_1_MASK);
+      decompressed[outOffset+56] = (int) ((s9 >>> 60) & BITS_1_MASK);
+      decompressed[outOffset+57] = (int) ((s9 >>> 61) & BITS_1_MASK);
+      decompressed[outOffset+58] = (int) ((s9 >>> 62) & BITS_1_MASK);
+      decompressed[outOffset+59] = (int) ((s9 >>> 63) & BITS_1_MASK);
+      return 60;
+    default:
+      throw new IllegalArgumentException("Unknown Simple9 status: " + (s9 >>> NUM_DATA_BITS));
+    }
+  }
+
+  public void compress() {
+    int encoded;
+
+    compressedSize = 0;
+    while (unComprSize > 0) {
+      encoded = compressSingle(unCompressedData, offset, unComprSize, compressedBuffer, compressedSize);
+      offset += encoded;
+      unComprSize -= encoded;
+      compressedSize++;
+    }
+    compressedSize <<= 3;
+  }
+
+  public void decompress() {
+    int totalOut = 0;
+
+    compressedBuffer.rewind();
+    while (unComprSize > 0) {
+      final int decoded = decompressSingle(compressedBuffer.get(), unCompressedData, totalOut);
+      unComprSize -= decoded;
+      totalOut += decoded;
+    }
+  }
+
+}

Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,218 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
+import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput;
+import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.TermsIndexReaderBase;
+import org.apache.lucene.index.codecs.TermsIndexWriterBase;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Simple64     
+ *
+ * @lucene.experimental
+ */
+
+// nocommit: we are wasting bits by packing blocks of integers into as few longs as possible?
+// we should be a variable int block codec instead?
+// we then shouldnt need header bytes at all... we read a fixed number of longs per block.
+public class Simple64Codec extends Codec {
+
+  private final int blockSize;
+
+  public Simple64Codec(int blockSize) {
+    this.blockSize = blockSize;
+    name = "Simple64";
+  }
+
+  @Override
+  public String toString() {
+    return name + "(blockSize=" + blockSize + ")";
+  }
+
+  // only for testing
+  public IntStreamFactory getIntFactory() {
+    return new Simple64IntFactory();
+  }
+
+  private class Simple64IntFactory extends IntStreamFactory {
+
+    @Override
+    public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
+      return new FixedIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) {
+
+        @Override
+        protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
+          return new BlockReader() {
+            final ByteBuffer byteBuffer = ByteBuffer.allocate(blockSize*8);
+            final byte[] input = byteBuffer.array();
+            final Simple64 decompressor = new Simple64();
+            
+            {
+              decompressor.setCompressedBuffer(byteBuffer.asLongBuffer());
+            }
+            
+            public void readBlock() throws IOException {
+              final int numBytes = in.readVInt() << 3; // read header
+              in.readBytes(input, 0, numBytes);
+              decompressor.setUnCompressedData(buffer, 0, buffer.length);
+              decompressor.decompress();              
+            }
+          };
+        }
+      };
+    }
+
+    @Override
+    public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
+      return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) {
+        final ByteBuffer byteBuffer = ByteBuffer.allocate(blockSize*8);
+        final byte[] output = byteBuffer.array();
+        final Simple64 compressor = new Simple64();
+        
+        {
+          compressor.setCompressedBuffer(byteBuffer.asLongBuffer());
+        }
+        
+        @Override
+        protected void flushBlock() throws IOException {
+          compressor.setUnCompressedData(buffer, 0, buffer.length);
+          compressor.compress();
+          final int numBytes = compressor.compressedSize();
+          out.writeVInt(numBytes >>> 3);
+          out.writeBytes(output, numBytes);
+        }
+      };
+    }
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new Simple64IntFactory());
+
+    boolean success = false;
+    TermsIndexWriterBase indexWriter;
+    try {
+      indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
+      success = true;
+    } finally {
+      if (!success) {
+        postingsWriter.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          postingsWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+    PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
+                                                                      state.segmentInfo,
+                                                                      state.readBufferSize,
+                                                                      new Simple64IntFactory(), state.codecId);
+
+    TermsIndexReaderBase indexReader;
+    boolean success = false;
+    try {
+      indexReader = new VariableGapTermsIndexReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    state.termsIndexDivisor,
+                                                    state.codecId);
+      success = true;
+    } finally {
+      if (!success) {
+        postingsReader.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsProducer ret = new BlockTermsReader(indexReader,
+                                                       state.dir,
+                                                       state.fieldInfos,
+                                                       state.segmentInfo.name,
+                                                       postingsReader,
+                                                       state.readBufferSize,
+                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
+                                                       StandardCodec.TERMS_CACHE_SIZE,
+                                                       state.codecId);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          postingsReader.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
+    SepPostingsReaderImpl.files(segmentInfo, codecId, files);
+    BlockTermsReader.files(dir, segmentInfo, codecId, files);
+    VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
+  }
+
+  @Override
+  public void getExtensions(Set<String> extensions) {
+    SepPostingsWriterImpl.getExtensions(extensions);
+    BlockTermsReader.getExtensions(extensions);
+    VariableGapTermsIndexReader.getIndexExtensions(extensions);
+  }
+}

Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,204 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
+import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexInput;
+import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexOutput;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.TermsIndexReaderBase;
+import org.apache.lucene.index.codecs.TermsIndexWriterBase;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Simple64
+ *
+ * @lucene.experimental
+ */
+
+public class Simple64VarIntCodec extends Codec {
+
+  public Simple64VarIntCodec() {
+    name = "Simple64VarInt";
+  }
+
+  @Override
+  public String toString() {
+    return name;
+  }
+
+  // only for testing
+  public IntStreamFactory getIntFactory() {
+    return new Simple64IntFactory();
+  }
+
+  private class Simple64IntFactory extends IntStreamFactory {
+
+    @Override
+    public IntIndexInput openInput(Directory dir, final String fileName, int readBufferSize) throws IOException {
+      return new VariableIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) {
+
+        @Override
+        protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
+          return new BlockReader() {
+            public int readBlock() throws IOException {
+              //System.out.println("S64.readBlock in.fp=" + in.getFilePointer());
+              int count = Simple64.decompressSingle(in.readLong(), buffer, 0);
+              /*
+              System.out.println("  count=" + count);
+              for(int i=0;i<count;i++) {
+                System.out.println("" + buffer[i]);
+              }
+              */
+              return count;
+            }
+          };
+        }
+      };
+    }
+
+    @Override
+    public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
+      return new VariableIntBlockIndexOutput(dir.createOutput(fileName), 61) {
+        final long[] result = new long[1];
+        final Simple64 compressor = new Simple64();
+        
+        @Override
+        protected int add(int v) throws IOException {
+          final int consumed = compressor.add(v, result);
+          if (consumed != 0) {
+            //System.out.println("S64.write count=" + consumed + " out.fp=" + out.getFilePointer());
+            out.writeLong(result[0]);
+          }
+          return consumed;
+        }
+      };
+    }
+  }
+
+  @Override
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new Simple64IntFactory());
+
+    boolean success = false;
+    TermsIndexWriterBase indexWriter;
+    try {
+      indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
+      success = true;
+    } finally {
+      if (!success) {
+        postingsWriter.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          postingsWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+    PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
+                                                                      state.segmentInfo,
+                                                                      state.readBufferSize,
+                                                                      new Simple64IntFactory(), state.codecId);
+
+    TermsIndexReaderBase indexReader;
+    boolean success = false;
+    try {
+      indexReader = new VariableGapTermsIndexReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    state.termsIndexDivisor,
+                                                    state.codecId);
+      success = true;
+    } finally {
+      if (!success) {
+        postingsReader.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsProducer ret = new BlockTermsReader(indexReader,
+                                                       state.dir,
+                                                       state.fieldInfos,
+                                                       state.segmentInfo.name,
+                                                       postingsReader,
+                                                       state.readBufferSize,
+                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
+                                                       StandardCodec.TERMS_CACHE_SIZE,
+                                                       state.codecId);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          postingsReader.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
+    SepPostingsReaderImpl.files(segmentInfo, codecId, files);
+    BlockTermsReader.files(dir, segmentInfo, codecId, files);
+    VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
+  }
+
+  @Override
+  public void getExtensions(Set<String> extensions) {
+    SepPostingsWriterImpl.getExtensions(extensions);
+    BlockTermsReader.getExtensions(extensions);
+    VariableGapTermsIndexReader.getIndexExtensions(extensions);
+  }
+}

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java Thu Feb  3 15:52:33 2011
@@ -257,6 +257,7 @@ public class TestOmitTf extends LuceneTe
             setSimilarityProvider(new SimpleSimilarity()).
             setMergePolicy(newLogMergePolicy(2))
     );
+    writer.setInfoStream(VERBOSE ? System.out : null);
         
     StringBuilder sb = new StringBuilder(265);
     String term = "term";

Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,66 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public abstract class CodecTestCase extends LuceneTestCase {
+
+  private static final int LIST_SIZE = 32768;
+
+  /**
+   * The different block sizes to test
+   */
+  protected static final int[] BLOCK_SIZES = {32, 256, 512, 2048};
+
+  /** both are inclusive! */
+  public static long nextLong(long start, long end) {
+    double r = random.nextDouble();
+    return (long) ((r * end) + ((1.0 - r) * start) + r);
+  }
+  
+  public void doTestIntegerRange(final int minBits, final int maxBits, final int[] blockSizes) throws IOException {
+    final int[] input = new int[LIST_SIZE];
+
+    for (int i = minBits; i <= maxBits; i++) {
+      System.out.println("TEST bits=" + i);
+
+      final long min = i == 1 ? 0 : (1L << (i - 1));
+      final long max = ((1L << i) - 1);
+
+      for (int j = 0; j < LIST_SIZE; j++) {
+        input[j] = (int) nextLong(min, max);
+      }
+
+      for (final int blockSize : blockSizes) {
+        if (VERBOSE)
+          System.out.println("Perform Integer Range Test: bits = {" + i + "}, block size = {" + blockSize + "}");
+        this.doTest(input, blockSize);
+      }
+    }
+  }
+
+  public void doTestIntegerRange(final int minBits, final int maxBits) throws IOException {
+    this.doTestIntegerRange(minBits, maxBits, BLOCK_SIZES);
+  }
+
+  protected abstract void doTest(int[] input, int blockSize) throws IOException;
+
+}

Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,127 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.index.codecs.CodecTestCase;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+public class Simple64Test extends CodecTestCase {
+
+  @Test
+  public void testSimple() throws IOException {
+    final int blockSize = 60;
+    final RAMDirectory dir = new RAMDirectory();
+    final String filename = Simple64.class.toString();
+    final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+    final IntIndexOutput output = factory.createOutput(dir, filename);
+    final int[] values = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+                           1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
+
+    for (final int element : values) {
+      output.write(element);
+    }
+    output.close();
+
+    final IntIndexInput input = factory.openInput(dir, filename);
+    final BlockReader reader = input.reader();
+    int buffer[] = reader.getBuffer();
+    reader.fill();
+    for (int i = 0; i < values.length; i++) {
+      assertEquals("Error at record " + i, values[i], buffer[i]);
+    }
+    input.close();
+    dir.close();
+  }
+
+  @Test
+  public void testSimple16bits() throws IOException {
+    final int blockSize = 60;
+    final RAMDirectory dir = new RAMDirectory();
+    final String filename = Simple64.class.toString();
+    final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+    final IntIndexOutput output = factory.createOutput(dir, filename);
+    final int[] values = { 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,};
+
+    for (final int element : values) {
+      output.write(element);
+    }
+    output.close();
+
+    final IntIndexInput input = factory.openInput(dir, filename);
+    final BlockReader reader = input.reader();
+    int buffer[] = reader.getBuffer();
+    reader.fill();
+    for (int i = 0; i < values.length; i++) {
+      assertEquals("Error at record " + i, values[i], buffer[i]);
+    }
+    input.close();
+    dir.close();
+  }
+
+  @Override
+  public void doTest(final int[] values, final int blockSize)
+      throws IOException {
+    final RAMDirectory dir = new RAMDirectory();
+    final String filename = Simple64.class.toString();
+    final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+    final IntIndexOutput output = factory.createOutput(dir, filename);
+
+    for (final int element : values) {
+      output.write(element);
+    }
+    output.close();
+
+    final IntIndexInput input = factory.openInput(dir, filename);
+    final BlockReader reader = input.reader();
+    int buffer[] = reader.getBuffer();
+    int pointer = 0;
+    int pointerMax = reader.fill();
+    assertTrue(pointerMax > 0);
+
+    for(int i=0;i<values.length;i++) {
+      if (pointer == pointerMax) {
+        pointerMax = reader.fill();
+        assertTrue(pointerMax > 0);
+        pointer = 0;
+      }
+      assertEquals(values[i], buffer[pointer++]);
+    }
+
+    input.close();
+    dir.close();
+  }
+
+  @Test
+  public void testIntegerRange32() throws IOException {
+    this.doTestIntegerRange(1, 32);
+  }
+}

Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java Thu Feb  3 15:52:33 2011
@@ -0,0 +1,176 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.index.codecs.CodecTestCase;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util._TestUtil;
+import org.junit.Test;
+
+public class Simple64VarIntTest extends CodecTestCase {
+
+  @Test
+  public void testSimple() throws IOException {
+    final int[] values = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+                           1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
+    doTest(values, 0);
+  }
+
+  @Test
+  public void testSimple16bits() throws IOException {
+
+    // 60 values:
+    final int[] values = { 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,60149,60149,60149,60149,60149,60149,60149,60149,
+                           60149,};
+    doTest(values, 0);
+  }
+
+  // nocommit -- blockSize is unused:
+  @Override
+  public void doTest(final int[] values, int blockSize)
+      throws IOException {
+    final RAMDirectory dir = new RAMDirectory();
+    final String filename = Simple64.class.toString();
+    final IntStreamFactory factory = new Simple64VarIntCodec().getIntFactory();
+    final IntIndexOutput output = factory.createOutput(dir, filename);
+
+    if (VERBOSE) {
+      System.out.println("TEST: " + values.length + " values");
+    }
+
+    final IndexOutput indexOutput = dir.createOutput("index");
+    final List<Integer> indexed = new ArrayList<Integer>();
+
+    final IntIndexOutput.Index index = output.index();
+
+    for (int upto=0;upto<values.length;upto++) {
+      final int element = values[upto];
+      if (VERBOSE) {
+        System.out.println("  add " + element);
+      }
+      if (random.nextInt(20) == 17) {
+        index.mark();
+        index.write(indexOutput, true);
+        indexed.add(upto);
+      }
+      output.write(element);
+    }
+    output.close();
+    indexOutput.close();
+
+    final IntIndexInput input = factory.openInput(dir, filename);
+    final BlockReader reader = input.reader();
+    int buffer[] = reader.getBuffer();
+    int pointer = 0;
+    int pointerMax = reader.fill();
+    assertTrue(pointerMax > 0);
+
+    if (VERBOSE) {
+      System.out.println("  verify...");
+    }
+    for(int i=0;i<values.length;i++) {
+      if (pointer == pointerMax) {
+        pointerMax = reader.fill();
+        assertTrue(pointerMax > 0);
+        pointer = 0;
+      }
+      if (VERBOSE) {
+        System.out.println("  got " + buffer[pointer]);
+      }
+      assertEquals(values[i], buffer[pointer++]);
+    }
+
+    // Now test seeking:
+    if (indexed.size() != 0) {
+      final IndexInput indexInput = dir.openInput("index");
+      List<IntIndexInput.Index> indexes = new ArrayList<IntIndexInput.Index>();
+      for(int spot : indexed) {
+        IntIndexInput.Index index2 = input.index();
+        index2.read(indexInput, true);
+        indexes.add(index2);
+      }
+      indexInput.close();
+
+      for(int iter=0;iter<100;iter++) {
+        final int spot = random.nextInt(indexed.size());
+        if (VERBOSE) {
+          System.out.println("TEST: seek index=" + indexes.get(spot));
+        }
+        indexes.get(spot).seek(reader);
+        pointerMax = reader.end();
+        pointer = reader.offset();
+        int upto = indexed.get(spot);
+        int limit = Math.min(upto+20, values.length);
+        while(upto < limit) {
+          if (pointer == pointerMax) {
+            pointerMax = reader.fill();
+            assertTrue(pointerMax > 0);
+            pointer = 0;
+          }
+          if (VERBOSE) {
+            System.out.println("  got " + buffer[pointer]);
+          }
+          assertEquals(values[upto++], buffer[pointer++]);
+        }
+      }
+    }
+
+    input.close();
+    dir.close();
+  }
+
+  @Test
+  public void testRandom() throws Exception {
+    // nocommit mixup size of int[]
+    // nocommit more iters:
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+      int size = _TestUtil.nextInt(random, 10, 1000);
+      int[] values = new int[size];
+      for(int i=0;i<values.length;i++) {
+        if (random.nextInt(20) == 17) {
+          values[i] = random.nextInt() & Integer.MAX_VALUE;
+        } else {
+          // duh -- & 3:
+          values[i] = random.nextInt() & 4;
+        }
+      }
+      doTest(values, 0);
+    }
+  }
+
+  @Test
+  public void testIntegerRange32() throws IOException {
+    this.doTestIntegerRange(1, 32);
+  }
+}