You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/02/03 16:52:34 UTC
svn commit: r1066838 - in /lucene/dev/branches/bulkpostings/lucene/src:
java/org/apache/lucene/index/codecs/
java/org/apache/lucene/index/codecs/intblock/
java/org/apache/lucene/index/codecs/simple64/ test/org/apache/lucene/index/
test/org/apache/lucen...
Author: mikemccand
Date: Thu Feb 3 15:52:33 2011
New Revision: 1066838
URL: http://svn.apache.org/viewvc?rev=1066838&view=rev
Log:
LUCENE-2886: add Simple64 fixed & var intblock experimental codec
Added:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java (with props)
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java (with props)
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java (with props)
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java (with props)
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java (with props)
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java (with props)
Modified:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Thu Feb 3 15:52:33 2011
@@ -26,6 +26,8 @@ import java.util.Set;
import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
+import org.apache.lucene.index.codecs.simple64.Simple64Codec;
+import org.apache.lucene.index.codecs.simple64.Simple64VarIntCodec;
import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
@@ -51,7 +53,7 @@ public class CodecProvider {
private final Set<String> knownExtensions = new HashSet<String>();
- public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2", "BulkVInt"};
+ public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2", "BulkVInt", "Simple64", "Simple64VarInt"};
public synchronized void register(Codec codec) {
if (codec.name == null) {
@@ -178,5 +180,7 @@ class DefaultCodecProvider extends Codec
register(new FrameOfRefCodec());
register(new PForDeltaFixedIntBlockCodec(128));
register(new BulkVIntCodec(128));
+ register(new Simple64Codec(128));
+ register(new Simple64VarIntCodec());
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Thu Feb 3 15:52:33 2011
@@ -70,7 +70,8 @@ public abstract class VariableIntBlockIn
public interface BlockReader {
public int readBlock() throws IOException;
- public void seek(long pos) throws IOException;
+ // nocommit -- do we really need?
+ //public void seek(long pos) throws IOException;
}
public static class Reader extends BulkPostingsEnum.BlockReader {
@@ -80,7 +81,7 @@ public abstract class VariableIntBlockIn
private int offset;
private long lastBlockFP;
- private int blockSize;
+ //private int blockSize; // nocommit redundant w/ limit?
private final BlockReader blockReader;
private int limit;
@@ -99,9 +100,10 @@ public abstract class VariableIntBlockIn
if (fp != lastBlockFP) {
// Seek to new block
in.seek(fp);
- blockReader.seek(fp);
+ // nocommit -- why?
+ //blockReader.seek(fp);
lastBlockFP = fp;
- limit = blockSize = blockReader.readBlock();
+ limit = blockReader.readBlock();
} else {
// Seek w/in current block
}
@@ -115,6 +117,7 @@ public abstract class VariableIntBlockIn
// non-causal, ie must see future int values to
// encode the current ones.
while(offset >= limit) {
+ //System.out.println("NON CAUSAL! offset=" + offset + " limit=" + limit);
offset -= limit;
fill();
}
@@ -147,8 +150,7 @@ public abstract class VariableIntBlockIn
// fill -- but we need it to detect seek w/in block
// case:
lastBlockFP = in.getFilePointer();
- blockSize = blockReader.readBlock();
- return limit = blockSize;
+ return limit = blockReader.readBlock();
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Thu Feb 3 15:52:33 2011
@@ -123,6 +123,11 @@ public abstract class VariableIntBlockIn
lastUpto = upto;
lastFP = fp;
}
+
+ @Override
+ public String toString() {
+ return "VarIntBlock.Output fp=" + fp + " upto=" + upto;
+ }
}
private boolean abort;
Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,978 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index.codecs.simple64;
+
+import java.nio.LongBuffer;
+
+public class Simple64 {
+
+ protected int compressedSize = 0;
+ protected LongBuffer compressedBuffer;
+
+ /** Uncompressed data */
+ protected int[] unCompressedData;
+ /** Offset into unCompressedData */
+ protected int offset;
+ /** Size of unCompressedData, -1 when not available. */
+ protected int unComprSize = -1;
+
+ public void setCompressedBuffer(final LongBuffer compressedBuffer) {
+ this.compressedBuffer = compressedBuffer;
+ }
+
+ public void setUnCompressedData(final int[] unCompressedData,
+ final int offset, final int unComprSize) {
+ this.unCompressedData = unCompressedData;
+ this.offset = offset;
+ this.unComprSize = unComprSize;
+ }
+
+ public int compressedSize() {
+ return this.compressedSize;
+ }
+
+ private static final int NUM_DATA_BITS = 60;
+ private static final long BITS_60_MASK = (1L << 60) - 1;
+ private static final long BITS_30_MASK = (1 << 30) - 1;
+ private static final long BITS_20_MASK = (1 << 20) - 1;
+ private static final long BITS_15_MASK = (1 << 15) - 1;
+ private static final long BITS_12_MASK = (1 << 12) - 1;
+ private static final long BITS_11_MASK = (1 << 11) - 1;
+ private static final long BITS_10_MASK = (1 << 10) - 1;
+ private static final long BITS_8_MASK = (1 << 8) - 1; // 4 bits unused, then the last value take them
+ private static final long BITS_7_MASK = (1 << 7) - 1; // 4 bits unused, then the last value take them
+ private static final long BITS_6_MASK = (1 << 6) - 1;
+ private static final long BITS_5_MASK = (1 << 5) - 1;
+ private static final long BITS_4_MASK = (1 << 4) - 1;
+ private static final long BITS_3_MASK = (1 << 3) - 1;
+ private static final long BITS_2_MASK = (1 << 2) - 1;
+ private static final long BITS_1_MASK = (1 << 1) - 1;
+
+ private static final int STATUS_1NUM_60BITS = 14;
+ private static final int STATUS_2NUM_30BITS = 13;
+ private static final int STATUS_3NUM_20BITS = 12;
+ private static final int STATUS_4NUM_15BITS = 11;
+ private static final int STATUS_5NUM_12BITS = 10;
+ private static final int STATUS_6NUM_10BITS = 9;
+ private static final int STATUS_7NUM_8BITS = 8;
+ private static final int STATUS_8NUM_7BITS = 7;
+ private static final int STATUS_10NUM_6BITS = 6;
+ private static final int STATUS_12NUM_5BITS = 5;
+ private static final int STATUS_15NUM_4BITS = 4;
+ private static final int STATUS_20NUM_3BITS = 3;
+ private static final int STATUS_30NUM_2BITS = 2;
+ private static final int STATUS_60NUM_1BITS = 1;
+
+ private int inputCompressable = 1;
+ private int minBits = 1;
+ private long maxFitPlus1 = (1 << minBits);
+ private final int[] pending = new int[100]; // nocommit -- 60 or 61 should do?
+ private int inputCount;
+
+ private void reset() {
+ inputCompressable = 1;
+ minBits = 1;
+ inputCount = 0;
+ maxFitPlus1 = (1 << minBits);
+ }
+
+ // nocommit -- need low level test that streaming api
+ // didn't break anything
+
+ // Returns 0 if no new long written, else returns number
+ // of input values and out[0] has the long to write
+ public int add(int v, long[] out) {
+ //System.out.println("S64.add v=" + v + " " + (1 + inputCount - inputCompressable) + " waiting");
+ pending[inputCount++] = v;
+ while(inputCompressable <= inputCount) {
+ final long nextData = pending[(inputCompressable-1)] & 0xFFFFFFFFL;
+ //System.out.println(" cycle: data=" + nextData);
+ while ((nextData >= maxFitPlus1) && (minBits < NUM_DATA_BITS)) {
+ //System.out.println(" cycle maxFitPlus1=" + maxFitPlus1 + " minBits=" + minBits);
+ if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) {
+ break;
+ } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) {
+ break;
+ } else {
+ //System.out.println(" advance");
+ minBits++;
+ maxFitPlus1 <<= 1;
+ if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+ inputCompressable--;
+ //System.out.println(" hard break");
+ break;
+ }
+ }
+ }
+ inputCompressable++;
+
+ //System.out.println(" minBits=" + minBits + " count=" + (inputCompressable-1) + " inputCount=" + inputCount);
+
+ if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+ // Time to compress!
+ inputCompressable--;
+ //System.out.println(" FLUSH count=" + inputCompressable);
+
+ // nocommit -- it should always be > 0... right??
+ assert inputCompressable > 0;
+
+ // Check whether a bigger number of bits can be used:
+ while ((inputCompressable * (minBits+1)) <= NUM_DATA_BITS) {
+ minBits++;
+ //System.out.println(" incr minBits=" + minBits);
+ }
+
+ /*
+ if (((inputCompressable+1) * minBits) <= NUM_DATA_BITS) {
+ // not enough input available for minBits
+ minBits++;
+ // do not compress all available input
+ inputCompressable = NUM_DATA_BITS / minBits;
+ }
+ */
+
+ // Put compression method in status bits and encode input data
+ long s9;
+ final int consumed;
+ switch (minBits) { // add status bits and later input values
+ case 60:
+ s9 = STATUS_1NUM_60BITS;
+ s9 |= (long) (pending[0]) << 4;
+ consumed = 1;
+ break;
+ case 30:
+ s9 = STATUS_2NUM_30BITS;
+ // nocommit -- make a single expr instead of |'ing ?
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 34;
+ consumed = 2;
+ break;
+ case 20:
+ s9 = STATUS_3NUM_20BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 24;
+ s9 |= (long) (pending[2]) << 44;
+ consumed = 3;
+ break;
+ case 15:
+ s9 = STATUS_4NUM_15BITS;
+ s9 |= ((long) (pending[0]) << 4);
+ s9 |= (long) (pending[1]) << 19;
+ s9 |= (long) (pending[2]) << 34;
+ s9 |= (long) (pending[3]) << 49;
+ consumed = 4;
+ break;
+ case 12:
+ s9 = STATUS_5NUM_12BITS;
+ s9 |= ((long) (pending[0]) << 4);
+ s9 |= (long) (pending[1]) << 16;
+ s9 |= (long) (pending[2]) << 28;
+ s9 |= (long) (pending[3]) << 40;
+ s9 |= (long) (pending[4]) << 52;
+ consumed = 5;
+ break;
+ case 10:
+ s9 = STATUS_6NUM_10BITS;
+ s9 |= ((long) (pending[0]) << 4);
+ s9 |= (long) (pending[1]) << 14;
+ s9 |= (long) (pending[2]) << 24;
+ s9 |= (long) (pending[3]) << 34;
+ s9 |= (long) (pending[4]) << 44;
+ s9 |= (long) (pending[5]) << 54;
+ consumed = 6;
+ break;
+ case 8:
+ s9 = STATUS_7NUM_8BITS;
+ s9 |= ((long) (pending[0]) << 4);
+ s9 |= (long) (pending[1]) << 12;
+ s9 |= (long) (pending[2]) << 20;
+ s9 |= (long) (pending[3]) << 28;
+ s9 |= (long) (pending[4]) << 36;
+ s9 |= (long) (pending[5]) << 44;
+ s9 |= (long) (pending[6]) << 52; // 4 more bits
+ consumed = 7;
+ break;
+ case 7:
+ s9 = STATUS_8NUM_7BITS;
+ s9 |= ((long) (pending[0]) << 4);
+ s9 |= (long) (pending[1]) << 11;
+ s9 |= (long) (pending[2]) << 18;
+ s9 |= (long) (pending[3]) << 25;
+ s9 |= (long) (pending[4]) << 32;
+ s9 |= (long) (pending[5]) << 39;
+ s9 |= (long) (pending[6]) << 46;
+ s9 |= (long) (pending[7]) << 53; // 4 more bits
+ consumed = 8;
+ break;
+ case 6:
+ s9 = STATUS_10NUM_6BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 10;
+ s9 |= (long) (pending[2]) << 16;
+ s9 |= (long) (pending[3]) << 22;
+ s9 |= (long) (pending[4]) << 28;
+ s9 |= (long) (pending[5]) << 34;
+ s9 |= (long) (pending[6]) << 40;
+ s9 |= (long) (pending[7]) << 46;
+ s9 |= (long) (pending[8]) << 52;
+ s9 |= (long) (pending[9]) << 58;
+ consumed = 10;
+ break;
+ case 5:
+ s9 = STATUS_12NUM_5BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 9;
+ s9 |= (long) (pending[2]) << 14;
+ s9 |= (long) (pending[3]) << 19;
+ s9 |= (long) (pending[4]) << 24;
+ s9 |= (long) (pending[5]) << 29;
+ s9 |= (long) (pending[6]) << 34;
+ s9 |= (long) (pending[7]) << 39;
+ s9 |= (long) (pending[8]) << 44;
+ s9 |= (long) (pending[9]) << 49;
+ s9 |= (long) (pending[10]) << 54;
+ s9 |= (long) (pending[11]) << 59;
+ consumed = 12;
+ break;
+ case 4:
+ s9 = STATUS_15NUM_4BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 8;
+ s9 |= (long) (pending[2]) << 12;
+ s9 |= (long) (pending[3]) << 16;
+ s9 |= (long) (pending[4]) << 20;
+ s9 |= (long) (pending[5]) << 24;
+ s9 |= (long) (pending[6]) << 28;
+ s9 |= (long) (pending[7]) << 32;
+ s9 |= (long) (pending[8]) << 36;
+ s9 |= (long) (pending[9]) << 40;
+ s9 |= (long) (pending[10]) << 44;
+ s9 |= (long) (pending[11]) << 48;
+ s9 |= (long) (pending[12]) << 52;
+ s9 |= (long) (pending[13]) << 56;
+ s9 |= (long) (pending[14]) << 60;
+ consumed = 15;
+ break;
+ case 3:
+ s9 = STATUS_20NUM_3BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 7;
+ s9 |= (long) (pending[2]) << 10;
+ s9 |= (long) (pending[3]) << 13;
+ s9 |= (long) (pending[4]) << 16;
+ s9 |= (long) (pending[5]) << 19;
+ s9 |= (long) (pending[6]) << 22;
+ s9 |= (long) (pending[7]) << 25;
+ s9 |= (long) (pending[8]) << 28;
+ s9 |= (long) (pending[9]) << 31;
+ s9 |= (long) (pending[10]) << 34;
+ s9 |= (long) (pending[11]) << 37;
+ s9 |= (long) (pending[12]) << 40;
+ s9 |= (long) (pending[13]) << 43;
+ s9 |= (long) (pending[14]) << 46;
+ s9 |= (long) (pending[15]) << 49;
+ s9 |= (long) (pending[16]) << 52;
+ s9 |= (long) (pending[17]) << 55;
+ s9 |= (long) (pending[18]) << 58;
+ s9 |= (long) (pending[19]) << 61;
+ consumed = 20;
+ break;
+ case 2:
+ s9 = STATUS_30NUM_2BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 6;
+ s9 |= (long) (pending[2]) << 8;
+ s9 |= (long) (pending[3]) << 10;
+ s9 |= (long) (pending[4]) << 12;
+ s9 |= (long) (pending[5]) << 14;
+ s9 |= (long) (pending[6]) << 16;
+ s9 |= (long) (pending[7]) << 18;
+ s9 |= (long) (pending[8]) << 20;
+ s9 |= (long) (pending[9]) << 22;
+ s9 |= (long) (pending[10]) << 24;
+ s9 |= (long) (pending[11]) << 26;
+ s9 |= (long) (pending[12]) << 28;
+ s9 |= (long) (pending[13]) << 30;
+ s9 |= (long) (pending[14]) << 32;
+ s9 |= (long) (pending[15]) << 34;
+ s9 |= (long) (pending[16]) << 36;
+ s9 |= (long) (pending[17]) << 38;
+ s9 |= (long) (pending[18]) << 40;
+ s9 |= (long) (pending[19]) << 42;
+ s9 |= (long) (pending[20]) << 44;
+ s9 |= (long) (pending[21]) << 46;
+ s9 |= (long) (pending[22]) << 48;
+ s9 |= (long) (pending[23]) << 50;
+ s9 |= (long) (pending[24]) << 52;
+ s9 |= (long) (pending[25]) << 54;
+ s9 |= (long) (pending[26]) << 56;
+ s9 |= (long) (pending[27]) << 58;
+ s9 |= (long) (pending[28]) << 60;
+ s9 |= (long) (pending[29]) << 62;
+ consumed = 30;
+ break;
+ case 1:
+ s9 = STATUS_60NUM_1BITS;
+ s9 |= (long) (pending[0]) << 4;
+ s9 |= (long) (pending[1]) << 5;
+ s9 |= (long) (pending[2]) << 6;
+ s9 |= (long) (pending[3]) << 7;
+ s9 |= (long) (pending[4]) << 8;
+ s9 |= (long) (pending[5]) << 9;
+ s9 |= (long) (pending[6]) << 10;
+ s9 |= (long) (pending[7]) << 11;
+ s9 |= (long) (pending[8]) << 12;
+ s9 |= (long) (pending[9]) << 13;
+ s9 |= (long) (pending[10]) << 14;
+ s9 |= (long) (pending[11]) << 15;
+ s9 |= (long) (pending[12]) << 16;
+ s9 |= (long) (pending[13]) << 17;
+ s9 |= (long) (pending[14]) << 18;
+ s9 |= (long) (pending[15]) << 19;
+ s9 |= (long) (pending[16]) << 20;
+ s9 |= (long) (pending[17]) << 21;
+ s9 |= (long) (pending[18]) << 22;
+ s9 |= (long) (pending[19]) << 23;
+ s9 |= (long) (pending[20]) << 24;
+ s9 |= (long) (pending[21]) << 25;
+ s9 |= (long) (pending[22]) << 26;
+ s9 |= (long) (pending[23]) << 27;
+ s9 |= (long) (pending[24]) << 28;
+ s9 |= (long) (pending[25]) << 29;
+ s9 |= (long) (pending[26]) << 30;
+ s9 |= (long) (pending[27]) << 31;
+ s9 |= (long) (pending[28]) << 32;
+ s9 |= (long) (pending[29]) << 33;
+ s9 |= (long) (pending[30]) << 34;
+ s9 |= (long) (pending[31]) << 35;
+ s9 |= (long) (pending[32]) << 36;
+ s9 |= (long) (pending[33]) << 37;
+ s9 |= (long) (pending[34]) << 38;
+ s9 |= (long) (pending[35]) << 39;
+ s9 |= (long) (pending[36]) << 40;
+ s9 |= (long) (pending[37]) << 41;
+ s9 |= (long) (pending[38]) << 42;
+ s9 |= (long) (pending[39]) << 43;
+ s9 |= (long) (pending[40]) << 44;
+ s9 |= (long) (pending[41]) << 45;
+ s9 |= (long) (pending[42]) << 46;
+ s9 |= (long) (pending[43]) << 47;
+ s9 |= (long) (pending[44]) << 48;
+ s9 |= (long) (pending[45]) << 49;
+ s9 |= (long) (pending[46]) << 50;
+ s9 |= (long) (pending[47]) << 51;
+ s9 |= (long) (pending[48]) << 52;
+ s9 |= (long) (pending[49]) << 53;
+ s9 |= (long) (pending[50]) << 54;
+ s9 |= (long) (pending[51]) << 55;
+ s9 |= (long) (pending[52]) << 56;
+ s9 |= (long) (pending[53]) << 57;
+ s9 |= (long) (pending[54]) << 58;
+ s9 |= (long) (pending[55]) << 59;
+ s9 |= (long) (pending[56]) << 60;
+ s9 |= (long) (pending[57]) << 61;
+ s9 |= (long) (pending[58]) << 62;
+ s9 |= (long) (pending[59]) << 63;
+ consumed = 60;
+ break;
+ default:
+ assert false;
+ s9 = 0;
+ consumed = 60;
+ //throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits);
+ }
+
+ final int leftover = inputCount - consumed;
+ assert leftover >= 0: "consumed=" + consumed + " vs " + inputCompressable;
+
+ /*
+ for(int x=0;x<consumed;x++) {
+ System.out.println(""+pending[x]);
+ }
+ */
+ /*
+ // like cd burning! ;)
+ {
+ int[] test = new int[consumed];
+ int ct = decompressSingle(s9, test, 0);
+ assert ct == consumed;
+ for(int x=0;x<ct;x++) {
+ assert test[x] == pending[x];
+ }
+ }
+ */
+
+ //System.out.println(" return consumed=" + consumed);
+ reset();
+
+ // save leftovers:
+ for(int i=0;i<leftover;i++) {
+ pending[i] = pending[consumed+i];
+ }
+ inputCount = leftover;
+
+ out[0] = s9;
+ return consumed;
+ }
+ }
+
+ return 0;
+ }
+
+ public static int compressSingle(final int[] uncompressed, final int inOffset, final int inSize, final LongBuffer compressedBuffer, final int outOffset) {
+ if (inSize < 1) {
+ throw new IllegalArgumentException("Cannot compress input with non positive size " + inSize);
+ }
+ int inputCompressable = 1;
+ int minBits = 1;
+ long maxFitPlus1 = (1 << minBits);
+ long nextData;
+
+ do {
+ nextData = uncompressed[inOffset + inputCompressable - 1] & 0xFFFFFFFFL;
+ if (nextData < 0) {
+ throw new IllegalArgumentException("Cannot compress negative input " + nextData + " (at index " + (inOffset+inputCompressable-1) + ")");
+ }
+ while ((nextData >= maxFitPlus1) && (minBits < NUM_DATA_BITS))
+ {
+ if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) {
+ break;
+ } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) {
+ break;
+ } else {
+ minBits++;
+ maxFitPlus1 <<= 1;
+ if ((inputCompressable * minBits) > NUM_DATA_BITS) {
+ inputCompressable--;
+ break;
+ }
+ }
+ }
+ inputCompressable++;
+ } while (((inputCompressable * minBits) <= NUM_DATA_BITS) && (inputCompressable <= inSize));
+
+ inputCompressable--;
+ if (inputCompressable == 0) {
+ throw new IllegalArgumentException("Cannot compress input " + nextData + " with more than " + NUM_DATA_BITS + " bits (at offSet " + inOffset + ")");
+ }
+
+ // Check whether a bigger number of bits can be used:
+ while ((inputCompressable * (minBits+1)) <= NUM_DATA_BITS) {
+ minBits++;
+ }
+
+ if (((inputCompressable+1) * minBits) <= NUM_DATA_BITS) {
+ // not enough input available for minBits
+ minBits++;
+ // do not compress all available input
+ inputCompressable = NUM_DATA_BITS / minBits;
+ }
+
+ // Put compression method in status bits and encode input data
+ long s9;
+ switch (minBits) { // add status bits and later input values
+ case 60:
+ s9 = STATUS_1NUM_60BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ compressedBuffer.put(outOffset, s9);
+ return 1;
+ case 30:
+ s9 = STATUS_2NUM_30BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 34;
+ compressedBuffer.put(outOffset, s9);
+ return 2;
+ case 20:
+ s9 = STATUS_3NUM_20BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 24;
+ s9 |= (long) (uncompressed[inOffset+2]) << 44;
+ compressedBuffer.put(outOffset, s9);
+ return 3;
+ case 15:
+ s9 = STATUS_4NUM_15BITS;
+ s9 |= ((long) (uncompressed[inOffset]) << 4);
+ s9 |= (long) (uncompressed[inOffset+1]) << 19;
+ s9 |= (long) (uncompressed[inOffset+2]) << 34;
+ s9 |= (long) (uncompressed[inOffset+3]) << 49;
+ compressedBuffer.put(outOffset, s9);
+ return 4;
+ case 12:
+ s9 = STATUS_5NUM_12BITS;
+ s9 |= ((long) (uncompressed[inOffset]) << 4);
+ s9 |= (long) (uncompressed[inOffset+1]) << 16;
+ s9 |= (long) (uncompressed[inOffset+2]) << 28;
+ s9 |= (long) (uncompressed[inOffset+3]) << 40;
+ s9 |= (long) (uncompressed[inOffset+4]) << 52;
+ compressedBuffer.put(outOffset, s9);
+ return 5;
+ case 10:
+ s9 = STATUS_6NUM_10BITS;
+ s9 |= ((long) (uncompressed[inOffset]) << 4);
+ s9 |= (long) (uncompressed[inOffset+1]) << 14;
+ s9 |= (long) (uncompressed[inOffset+2]) << 24;
+ s9 |= (long) (uncompressed[inOffset+3]) << 34;
+ s9 |= (long) (uncompressed[inOffset+4]) << 44;
+ s9 |= (long) (uncompressed[inOffset+5]) << 54;
+ compressedBuffer.put(outOffset, s9);
+ return 6;
+ case 8:
+ s9 = STATUS_7NUM_8BITS;
+ s9 |= ((long) (uncompressed[inOffset]) << 4);
+ s9 |= (long) (uncompressed[inOffset+1]) << 12;
+ s9 |= (long) (uncompressed[inOffset+2]) << 20;
+ s9 |= (long) (uncompressed[inOffset+3]) << 28;
+ s9 |= (long) (uncompressed[inOffset+4]) << 36;
+ s9 |= (long) (uncompressed[inOffset+5]) << 44;
+ s9 |= (long) (uncompressed[inOffset+6]) << 52; // 4 more bits
+ compressedBuffer.put(outOffset, s9);
+ return 7;
+ case 7:
+ s9 = STATUS_8NUM_7BITS;
+ s9 |= ((long) (uncompressed[inOffset]) << 4);
+ s9 |= (long) (uncompressed[inOffset+1]) << 11;
+ s9 |= (long) (uncompressed[inOffset+2]) << 18;
+ s9 |= (long) (uncompressed[inOffset+3]) << 25;
+ s9 |= (long) (uncompressed[inOffset+4]) << 32;
+ s9 |= (long) (uncompressed[inOffset+5]) << 39;
+ s9 |= (long) (uncompressed[inOffset+6]) << 46;
+ s9 |= (long) (uncompressed[inOffset+7]) << 53; // 4 more bits
+ compressedBuffer.put(outOffset, s9);
+ return 8;
+ case 6:
+ s9 = STATUS_10NUM_6BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 10;
+ s9 |= (long) (uncompressed[inOffset+2]) << 16;
+ s9 |= (long) (uncompressed[inOffset+3]) << 22;
+ s9 |= (long) (uncompressed[inOffset+4]) << 28;
+ s9 |= (long) (uncompressed[inOffset+5]) << 34;
+ s9 |= (long) (uncompressed[inOffset+6]) << 40;
+ s9 |= (long) (uncompressed[inOffset+7]) << 46;
+ s9 |= (long) (uncompressed[inOffset+8]) << 52;
+ s9 |= (long) (uncompressed[inOffset+9]) << 58;
+ compressedBuffer.put(outOffset, s9);
+ return 10;
+ case 5:
+ s9 = STATUS_12NUM_5BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 9;
+ s9 |= (long) (uncompressed[inOffset+2]) << 14;
+ s9 |= (long) (uncompressed[inOffset+3]) << 19;
+ s9 |= (long) (uncompressed[inOffset+4]) << 24;
+ s9 |= (long) (uncompressed[inOffset+5]) << 29;
+ s9 |= (long) (uncompressed[inOffset+6]) << 34;
+ s9 |= (long) (uncompressed[inOffset+7]) << 39;
+ s9 |= (long) (uncompressed[inOffset+8]) << 44;
+ s9 |= (long) (uncompressed[inOffset+9]) << 49;
+ s9 |= (long) (uncompressed[inOffset+10]) << 54;
+ s9 |= (long) (uncompressed[inOffset+11]) << 59;
+ compressedBuffer.put(outOffset, s9);
+ return 12;
+ case 4:
+ s9 = STATUS_15NUM_4BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 8;
+ s9 |= (long) (uncompressed[inOffset+2]) << 12;
+ s9 |= (long) (uncompressed[inOffset+3]) << 16;
+ s9 |= (long) (uncompressed[inOffset+4]) << 20;
+ s9 |= (long) (uncompressed[inOffset+5]) << 24;
+ s9 |= (long) (uncompressed[inOffset+6]) << 28;
+ s9 |= (long) (uncompressed[inOffset+7]) << 32;
+ s9 |= (long) (uncompressed[inOffset+8]) << 36;
+ s9 |= (long) (uncompressed[inOffset+9]) << 40;
+ s9 |= (long) (uncompressed[inOffset+10]) << 44;
+ s9 |= (long) (uncompressed[inOffset+11]) << 48;
+ s9 |= (long) (uncompressed[inOffset+12]) << 52;
+ s9 |= (long) (uncompressed[inOffset+13]) << 56;
+ s9 |= (long) (uncompressed[inOffset+14]) << 60;
+ compressedBuffer.put(outOffset, s9);
+ return 15;
+ case 3:
+ s9 = STATUS_20NUM_3BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 7;
+ s9 |= (long) (uncompressed[inOffset+2]) << 10;
+ s9 |= (long) (uncompressed[inOffset+3]) << 13;
+ s9 |= (long) (uncompressed[inOffset+4]) << 16;
+ s9 |= (long) (uncompressed[inOffset+5]) << 19;
+ s9 |= (long) (uncompressed[inOffset+6]) << 22;
+ s9 |= (long) (uncompressed[inOffset+7]) << 25;
+ s9 |= (long) (uncompressed[inOffset+8]) << 28;
+ s9 |= (long) (uncompressed[inOffset+9]) << 31;
+ s9 |= (long) (uncompressed[inOffset+10]) << 34;
+ s9 |= (long) (uncompressed[inOffset+11]) << 37;
+ s9 |= (long) (uncompressed[inOffset+12]) << 40;
+ s9 |= (long) (uncompressed[inOffset+13]) << 43;
+ s9 |= (long) (uncompressed[inOffset+14]) << 46;
+ s9 |= (long) (uncompressed[inOffset+15]) << 49;
+ s9 |= (long) (uncompressed[inOffset+16]) << 52;
+ s9 |= (long) (uncompressed[inOffset+17]) << 55;
+ s9 |= (long) (uncompressed[inOffset+18]) << 58;
+ s9 |= (long) (uncompressed[inOffset+19]) << 61;
+ compressedBuffer.put(outOffset, s9);
+ return 20;
+ case 2:
+ s9 = STATUS_30NUM_2BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 6;
+ s9 |= (long) (uncompressed[inOffset+2]) << 8;
+ s9 |= (long) (uncompressed[inOffset+3]) << 10;
+ s9 |= (long) (uncompressed[inOffset+4]) << 12;
+ s9 |= (long) (uncompressed[inOffset+5]) << 14;
+ s9 |= (long) (uncompressed[inOffset+6]) << 16;
+ s9 |= (long) (uncompressed[inOffset+7]) << 18;
+ s9 |= (long) (uncompressed[inOffset+8]) << 20;
+ s9 |= (long) (uncompressed[inOffset+9]) << 22;
+ s9 |= (long) (uncompressed[inOffset+10]) << 24;
+ s9 |= (long) (uncompressed[inOffset+11]) << 26;
+ s9 |= (long) (uncompressed[inOffset+12]) << 28;
+ s9 |= (long) (uncompressed[inOffset+13]) << 30;
+ s9 |= (long) (uncompressed[inOffset+14]) << 32;
+ s9 |= (long) (uncompressed[inOffset+15]) << 34;
+ s9 |= (long) (uncompressed[inOffset+16]) << 36;
+ s9 |= (long) (uncompressed[inOffset+17]) << 38;
+ s9 |= (long) (uncompressed[inOffset+18]) << 40;
+ s9 |= (long) (uncompressed[inOffset+19]) << 42;
+ s9 |= (long) (uncompressed[inOffset+20]) << 44;
+ s9 |= (long) (uncompressed[inOffset+21]) << 46;
+ s9 |= (long) (uncompressed[inOffset+22]) << 48;
+ s9 |= (long) (uncompressed[inOffset+23]) << 50;
+ s9 |= (long) (uncompressed[inOffset+24]) << 52;
+ s9 |= (long) (uncompressed[inOffset+25]) << 54;
+ s9 |= (long) (uncompressed[inOffset+26]) << 56;
+ s9 |= (long) (uncompressed[inOffset+27]) << 58;
+ s9 |= (long) (uncompressed[inOffset+28]) << 60;
+ s9 |= (long) (uncompressed[inOffset+29]) << 62;
+ compressedBuffer.put(outOffset, s9);
+ return 30;
+ case 1:
+ s9 = STATUS_60NUM_1BITS;
+ s9 |= (long) (uncompressed[inOffset]) << 4;
+ s9 |= (long) (uncompressed[inOffset+1]) << 5;
+ s9 |= (long) (uncompressed[inOffset+2]) << 6;
+ s9 |= (long) (uncompressed[inOffset+3]) << 7;
+ s9 |= (long) (uncompressed[inOffset+4]) << 8;
+ s9 |= (long) (uncompressed[inOffset+5]) << 9;
+ s9 |= (long) (uncompressed[inOffset+6]) << 10;
+ s9 |= (long) (uncompressed[inOffset+7]) << 11;
+ s9 |= (long) (uncompressed[inOffset+8]) << 12;
+ s9 |= (long) (uncompressed[inOffset+9]) << 13;
+ s9 |= (long) (uncompressed[inOffset+10]) << 14;
+ s9 |= (long) (uncompressed[inOffset+11]) << 15;
+ s9 |= (long) (uncompressed[inOffset+12]) << 16;
+ s9 |= (long) (uncompressed[inOffset+13]) << 17;
+ s9 |= (long) (uncompressed[inOffset+14]) << 18;
+ s9 |= (long) (uncompressed[inOffset+15]) << 19;
+ s9 |= (long) (uncompressed[inOffset+16]) << 20;
+ s9 |= (long) (uncompressed[inOffset+17]) << 21;
+ s9 |= (long) (uncompressed[inOffset+18]) << 22;
+ s9 |= (long) (uncompressed[inOffset+19]) << 23;
+ s9 |= (long) (uncompressed[inOffset+20]) << 24;
+ s9 |= (long) (uncompressed[inOffset+21]) << 25;
+ s9 |= (long) (uncompressed[inOffset+22]) << 26;
+ s9 |= (long) (uncompressed[inOffset+23]) << 27;
+ s9 |= (long) (uncompressed[inOffset+24]) << 28;
+ s9 |= (long) (uncompressed[inOffset+25]) << 29;
+ s9 |= (long) (uncompressed[inOffset+26]) << 30;
+ s9 |= (long) (uncompressed[inOffset+27]) << 31;
+ s9 |= (long) (uncompressed[inOffset+28]) << 32;
+ s9 |= (long) (uncompressed[inOffset+29]) << 33;
+ s9 |= (long) (uncompressed[inOffset+30]) << 34;
+ s9 |= (long) (uncompressed[inOffset+31]) << 35;
+ s9 |= (long) (uncompressed[inOffset+32]) << 36;
+ s9 |= (long) (uncompressed[inOffset+33]) << 37;
+ s9 |= (long) (uncompressed[inOffset+34]) << 38;
+ s9 |= (long) (uncompressed[inOffset+35]) << 39;
+ s9 |= (long) (uncompressed[inOffset+36]) << 40;
+ s9 |= (long) (uncompressed[inOffset+37]) << 41;
+ s9 |= (long) (uncompressed[inOffset+38]) << 42;
+ s9 |= (long) (uncompressed[inOffset+39]) << 43;
+ s9 |= (long) (uncompressed[inOffset+40]) << 44;
+ s9 |= (long) (uncompressed[inOffset+41]) << 45;
+ s9 |= (long) (uncompressed[inOffset+42]) << 46;
+ s9 |= (long) (uncompressed[inOffset+43]) << 47;
+ s9 |= (long) (uncompressed[inOffset+44]) << 48;
+ s9 |= (long) (uncompressed[inOffset+45]) << 49;
+ s9 |= (long) (uncompressed[inOffset+46]) << 50;
+ s9 |= (long) (uncompressed[inOffset+47]) << 51;
+ s9 |= (long) (uncompressed[inOffset+48]) << 52;
+ s9 |= (long) (uncompressed[inOffset+49]) << 53;
+ s9 |= (long) (uncompressed[inOffset+50]) << 54;
+ s9 |= (long) (uncompressed[inOffset+51]) << 55;
+ s9 |= (long) (uncompressed[inOffset+52]) << 56;
+ s9 |= (long) (uncompressed[inOffset+53]) << 57;
+ s9 |= (long) (uncompressed[inOffset+54]) << 58;
+ s9 |= (long) (uncompressed[inOffset+55]) << 59;
+ s9 |= (long) (uncompressed[inOffset+56]) << 60;
+ s9 |= (long) (uncompressed[inOffset+57]) << 61;
+ s9 |= (long) (uncompressed[inOffset+58]) << 62;
+ s9 |= (long) (uncompressed[inOffset+59]) << 63;
+ compressedBuffer.put(outOffset, s9);
+ return 60;
+ default:
+ throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits);
+ }
+ }
+
+
+ public static int decompressSingle(final long s9, final int[] decompressed, final int outOffset) {
+ //System.out.println("S64.decompressSingle header=" + (int) (s9&15));
+ // nocommit case s9 to int then & with 15???
+ switch ((int) (s9 & 15)) {
+ case STATUS_1NUM_60BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_60_MASK); // The value can't be bigger than 2^32-1
+ return 1;
+ case STATUS_2NUM_30BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_30_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 34) & BITS_30_MASK);
+ return 2;
+ case STATUS_3NUM_20BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_20_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 24) & BITS_20_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 44) & BITS_20_MASK);
+ return 3;
+ case STATUS_4NUM_15BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_15_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 19) & BITS_15_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 34) & BITS_15_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 49) & BITS_15_MASK);
+ return 4;
+ case STATUS_5NUM_12BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_12_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 16) & BITS_12_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 28) & BITS_12_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 40) & BITS_12_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 52) & BITS_12_MASK);
+ return 5;
+ case STATUS_6NUM_10BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_10_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 14) & BITS_10_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 24) & BITS_10_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 34) & BITS_10_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 44) & BITS_10_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 54) & BITS_10_MASK);
+ return 6;
+ case STATUS_7NUM_8BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_8_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 12) & BITS_8_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 20) & BITS_8_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 28) & BITS_8_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 36) & BITS_8_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 44) & BITS_8_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 52) & BITS_12_MASK);
+ return 7;
+ case STATUS_8NUM_7BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_7_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 11) & BITS_7_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 18) & BITS_7_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 25) & BITS_7_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 32) & BITS_7_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 39) & BITS_7_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 46) & BITS_7_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 53) & BITS_11_MASK);
+ return 8;
+ case STATUS_10NUM_6BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_6_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 10) & BITS_6_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 16) & BITS_6_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 22) & BITS_6_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 28) & BITS_6_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 34) & BITS_6_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 40) & BITS_6_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 46) & BITS_6_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 52) & BITS_6_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 58) & BITS_6_MASK);
+ return 10;
+ case STATUS_12NUM_5BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_5_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 9) & BITS_5_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 14) & BITS_5_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 19) & BITS_5_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 24) & BITS_5_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 29) & BITS_5_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 34) & BITS_5_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 39) & BITS_5_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 44) & BITS_5_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 49) & BITS_5_MASK);
+ decompressed[outOffset+10] = (int) ((s9 >>> 54) & BITS_5_MASK);
+ decompressed[outOffset+11] = (int) ((s9 >>> 59) & BITS_5_MASK);
+ return 12;
+ case STATUS_15NUM_4BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_4_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 8) & BITS_4_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 12) & BITS_4_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 16) & BITS_4_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 20) & BITS_4_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 24) & BITS_4_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 28) & BITS_4_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 32) & BITS_4_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 36) & BITS_4_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 40) & BITS_4_MASK);
+ decompressed[outOffset+10] = (int) ((s9 >>> 44) & BITS_4_MASK);
+ decompressed[outOffset+11] = (int) ((s9 >>> 48) & BITS_4_MASK);
+ decompressed[outOffset+12] = (int) ((s9 >>> 52) & BITS_4_MASK);
+ decompressed[outOffset+13] = (int) ((s9 >>> 56) & BITS_4_MASK);
+ decompressed[outOffset+14] = (int) ((s9 >>> 60) & BITS_4_MASK);
+ return 15;
+ case STATUS_20NUM_3BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_3_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 7) & BITS_3_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 10) & BITS_3_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 13) & BITS_3_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 16) & BITS_3_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 19) & BITS_3_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 22) & BITS_3_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 25) & BITS_3_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 28) & BITS_3_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 31) & BITS_3_MASK);
+ decompressed[outOffset+10] = (int) ((s9 >>> 34) & BITS_3_MASK);
+ decompressed[outOffset+11] = (int) ((s9 >>> 37) & BITS_3_MASK);
+ decompressed[outOffset+12] = (int) ((s9 >>> 40) & BITS_3_MASK);
+ decompressed[outOffset+13] = (int) ((s9 >>> 43) & BITS_3_MASK);
+ decompressed[outOffset+14] = (int) ((s9 >>> 46) & BITS_3_MASK);
+ decompressed[outOffset+15] = (int) ((s9 >>> 49) & BITS_3_MASK);
+ decompressed[outOffset+16] = (int) ((s9 >>> 52) & BITS_3_MASK);
+ decompressed[outOffset+17] = (int) ((s9 >>> 55) & BITS_3_MASK);
+ decompressed[outOffset+18] = (int) ((s9 >>> 58) & BITS_3_MASK);
+ decompressed[outOffset+19] = (int) ((s9 >>> 61) & BITS_3_MASK);
+ return 20;
+ case STATUS_30NUM_2BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_2_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 6) & BITS_2_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 8) & BITS_2_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 10) & BITS_2_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 12) & BITS_2_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 14) & BITS_2_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 16) & BITS_2_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 18) & BITS_2_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 20) & BITS_2_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 22) & BITS_2_MASK);
+ decompressed[outOffset+10] = (int) ((s9 >>> 24) & BITS_2_MASK);
+ decompressed[outOffset+11] = (int) ((s9 >>> 26) & BITS_2_MASK);
+ decompressed[outOffset+12] = (int) ((s9 >>> 28) & BITS_2_MASK);
+ decompressed[outOffset+13] = (int) ((s9 >>> 30) & BITS_2_MASK);
+ decompressed[outOffset+14] = (int) ((s9 >>> 32) & BITS_2_MASK);
+ decompressed[outOffset+15] = (int) ((s9 >>> 34) & BITS_2_MASK);
+ decompressed[outOffset+16] = (int) ((s9 >>> 36) & BITS_2_MASK);
+ decompressed[outOffset+17] = (int) ((s9 >>> 38) & BITS_2_MASK);
+ decompressed[outOffset+18] = (int) ((s9 >>> 40) & BITS_2_MASK);
+ decompressed[outOffset+19] = (int) ((s9 >>> 42) & BITS_2_MASK);
+ decompressed[outOffset+20] = (int) ((s9 >> 44) & BITS_2_MASK);
+ decompressed[outOffset+21] = (int) ((s9 >>> 46) & BITS_2_MASK);
+ decompressed[outOffset+22] = (int) ((s9 >>> 48) & BITS_2_MASK);
+ decompressed[outOffset+23] = (int) ((s9 >>> 50) & BITS_2_MASK);
+ decompressed[outOffset+24] = (int) ((s9 >>> 52) & BITS_2_MASK);
+ decompressed[outOffset+25] = (int) ((s9 >>> 54) & BITS_2_MASK);
+ decompressed[outOffset+26] = (int) ((s9 >>> 56) & BITS_2_MASK);
+ decompressed[outOffset+27] = (int) ((s9 >>> 58) & BITS_2_MASK);
+ decompressed[outOffset+28] = (int) ((s9 >>> 60) & BITS_2_MASK);
+ decompressed[outOffset+29] = (int) ((s9 >>> 62) & BITS_2_MASK);
+ return 30;
+ case STATUS_60NUM_1BITS:
+ decompressed[outOffset] = (int) ((s9 >>> 4) & BITS_1_MASK);
+ decompressed[outOffset+1] = (int) ((s9 >>> 5) & BITS_1_MASK);
+ decompressed[outOffset+2] = (int) ((s9 >>> 6) & BITS_1_MASK);
+ decompressed[outOffset+3] = (int) ((s9 >>> 7) & BITS_1_MASK);
+ decompressed[outOffset+4] = (int) ((s9 >>> 8) & BITS_1_MASK);
+ decompressed[outOffset+5] = (int) ((s9 >>> 9) & BITS_1_MASK);
+ decompressed[outOffset+6] = (int) ((s9 >>> 10) & BITS_1_MASK);
+ decompressed[outOffset+7] = (int) ((s9 >>> 11) & BITS_1_MASK);
+ decompressed[outOffset+8] = (int) ((s9 >>> 12) & BITS_1_MASK);
+ decompressed[outOffset+9] = (int) ((s9 >>> 13) & BITS_1_MASK);
+ decompressed[outOffset+10] = (int) ((s9 >>> 14) & BITS_1_MASK);
+ decompressed[outOffset+11] = (int) ((s9 >>> 15) & BITS_1_MASK);
+ decompressed[outOffset+12] = (int) ((s9 >>> 16) & BITS_1_MASK);
+ decompressed[outOffset+13] = (int) ((s9 >>> 17) & BITS_1_MASK);
+ decompressed[outOffset+14] = (int) ((s9 >>> 18) & BITS_1_MASK);
+ decompressed[outOffset+15] = (int) ((s9 >>> 19) & BITS_1_MASK);
+ decompressed[outOffset+16] = (int) ((s9 >>> 20) & BITS_1_MASK);
+ decompressed[outOffset+17] = (int) ((s9 >>> 21) & BITS_1_MASK);
+ decompressed[outOffset+18] = (int) ((s9 >>> 22) & BITS_1_MASK);
+ decompressed[outOffset+19] = (int) ((s9 >>> 23) & BITS_1_MASK);
+ decompressed[outOffset+20] = (int) ((s9 >> 24) & BITS_1_MASK);
+ decompressed[outOffset+21] = (int) ((s9 >>> 25) & BITS_1_MASK);
+ decompressed[outOffset+22] = (int) ((s9 >>> 26) & BITS_1_MASK);
+ decompressed[outOffset+23] = (int) ((s9 >>> 27) & BITS_1_MASK);
+ decompressed[outOffset+24] = (int) ((s9 >>> 28) & BITS_1_MASK);
+ decompressed[outOffset+25] = (int) ((s9 >>> 29) & BITS_1_MASK);
+ decompressed[outOffset+26] = (int) ((s9 >>> 30) & BITS_1_MASK);
+ decompressed[outOffset+27] = (int) ((s9 >>> 31) & BITS_1_MASK);
+ decompressed[outOffset+28] = (int) ((s9 >>> 32) & BITS_1_MASK);
+ decompressed[outOffset+29] = (int) ((s9 >>> 33) & BITS_1_MASK);
+ decompressed[outOffset+30] = (int) ((s9 >> 34) & BITS_1_MASK);
+ decompressed[outOffset+31] = (int) ((s9 >>> 35) & BITS_1_MASK);
+ decompressed[outOffset+32] = (int) ((s9 >>> 36) & BITS_1_MASK);
+ decompressed[outOffset+33] = (int) ((s9 >>> 37) & BITS_1_MASK);
+ decompressed[outOffset+34] = (int) ((s9 >>> 38) & BITS_1_MASK);
+ decompressed[outOffset+35] = (int) ((s9 >>> 39) & BITS_1_MASK);
+ decompressed[outOffset+36] = (int) ((s9 >>> 40) & BITS_1_MASK);
+ decompressed[outOffset+37] = (int) ((s9 >>> 41) & BITS_1_MASK);
+ decompressed[outOffset+38] = (int) ((s9 >>> 42) & BITS_1_MASK);
+ decompressed[outOffset+39] = (int) ((s9 >>> 43) & BITS_1_MASK);
+ decompressed[outOffset+40] = (int) ((s9 >>> 44) & BITS_1_MASK);
+ decompressed[outOffset+41] = (int) ((s9 >>> 45) & BITS_1_MASK);
+ decompressed[outOffset+42] = (int) ((s9 >>> 46) & BITS_1_MASK);
+ decompressed[outOffset+43] = (int) ((s9 >>> 47) & BITS_1_MASK);
+ decompressed[outOffset+44] = (int) ((s9 >>> 48) & BITS_1_MASK);
+ decompressed[outOffset+45] = (int) ((s9 >>> 49) & BITS_1_MASK);
+ decompressed[outOffset+46] = (int) ((s9 >>> 50) & BITS_1_MASK);
+ decompressed[outOffset+47] = (int) ((s9 >>> 51) & BITS_1_MASK);
+ decompressed[outOffset+48] = (int) ((s9 >>> 52) & BITS_1_MASK);
+ decompressed[outOffset+49] = (int) ((s9 >>> 53) & BITS_1_MASK);
+ decompressed[outOffset+50] = (int) ((s9 >> 54) & BITS_1_MASK);
+ decompressed[outOffset+51] = (int) ((s9 >>> 55) & BITS_1_MASK);
+ decompressed[outOffset+52] = (int) ((s9 >>> 56) & BITS_1_MASK);
+ decompressed[outOffset+53] = (int) ((s9 >>> 57) & BITS_1_MASK);
+ decompressed[outOffset+54] = (int) ((s9 >>> 58) & BITS_1_MASK);
+ decompressed[outOffset+55] = (int) ((s9 >>> 59) & BITS_1_MASK);
+ decompressed[outOffset+56] = (int) ((s9 >>> 60) & BITS_1_MASK);
+ decompressed[outOffset+57] = (int) ((s9 >>> 61) & BITS_1_MASK);
+ decompressed[outOffset+58] = (int) ((s9 >>> 62) & BITS_1_MASK);
+ decompressed[outOffset+59] = (int) ((s9 >>> 63) & BITS_1_MASK);
+ return 60;
+ default:
+ throw new IllegalArgumentException("Unknown Simple9 status: " + (s9 >>> NUM_DATA_BITS));
+ }
+ }
+
+ public void compress() {
+ int encoded;
+
+ compressedSize = 0;
+ while (unComprSize > 0) {
+ encoded = compressSingle(unCompressedData, offset, unComprSize, compressedBuffer, compressedSize);
+ offset += encoded;
+ unComprSize -= encoded;
+ compressedSize++;
+ }
+ compressedSize <<= 3;
+ }
+
+ public void decompress() {
+ int totalOut = 0;
+
+ compressedBuffer.rewind();
+ while (unComprSize > 0) {
+ final int decoded = decompressSingle(compressedBuffer.get(), unCompressedData, totalOut);
+ unComprSize -= decoded;
+ totalOut += decoded;
+ }
+ }
+
+}
Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64Codec.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,218 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
+import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput;
+import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.TermsIndexReaderBase;
+import org.apache.lucene.index.codecs.TermsIndexWriterBase;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Simple64
+ *
+ * @lucene.experimental
+ */
+
+// nocommit: we are wasting bits by packing blocks of integers into as few longs as possible?
+// we should be a variable int block codec instead?
+// we then shouldnt need header bytes at all... we read a fixed number of longs per block.
+public class Simple64Codec extends Codec {
+
+ private final int blockSize;
+
+ public Simple64Codec(int blockSize) {
+ this.blockSize = blockSize;
+ name = "Simple64";
+ }
+
+ @Override
+ public String toString() {
+ return name + "(blockSize=" + blockSize + ")";
+ }
+
+ // only for testing
+ public IntStreamFactory getIntFactory() {
+ return new Simple64IntFactory();
+ }
+
+ private class Simple64IntFactory extends IntStreamFactory {
+
+ @Override
+ public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
+ return new FixedIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) {
+
+ @Override
+ protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
+ return new BlockReader() {
+ final ByteBuffer byteBuffer = ByteBuffer.allocate(blockSize*8);
+ final byte[] input = byteBuffer.array();
+ final Simple64 decompressor = new Simple64();
+
+ {
+ decompressor.setCompressedBuffer(byteBuffer.asLongBuffer());
+ }
+
+ public void readBlock() throws IOException {
+ final int numBytes = in.readVInt() << 3; // read header
+ in.readBytes(input, 0, numBytes);
+ decompressor.setUnCompressedData(buffer, 0, buffer.length);
+ decompressor.decompress();
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
+ return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) {
+ final ByteBuffer byteBuffer = ByteBuffer.allocate(blockSize*8);
+ final byte[] output = byteBuffer.array();
+ final Simple64 compressor = new Simple64();
+
+ {
+ compressor.setCompressedBuffer(byteBuffer.asLongBuffer());
+ }
+
+ @Override
+ protected void flushBlock() throws IOException {
+ compressor.setUnCompressedData(buffer, 0, buffer.length);
+ compressor.compress();
+ final int numBytes = compressor.compressedSize();
+ out.writeVInt(numBytes >>> 3);
+ out.writeBytes(output, numBytes);
+ }
+ };
+ }
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new Simple64IntFactory());
+
+ boolean success = false;
+ TermsIndexWriterBase indexWriter;
+ try {
+ indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
+ success = true;
+ } finally {
+ if (!success) {
+ postingsWriter.close();
+ }
+ }
+
+ success = false;
+ try {
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ postingsWriter.close();
+ } finally {
+ indexWriter.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
+ state.segmentInfo,
+ state.readBufferSize,
+ new Simple64IntFactory(), state.codecId);
+
+ TermsIndexReaderBase indexReader;
+ boolean success = false;
+ try {
+ indexReader = new VariableGapTermsIndexReader(state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ state.termsIndexDivisor,
+ state.codecId);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsReader.close();
+ }
+ }
+
+ success = false;
+ try {
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ postingsReader.close();
+ } finally {
+ indexReader.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
+ SepPostingsReaderImpl.files(segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
+ VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
+ }
+
+ @Override
+ public void getExtensions(Set<String> extensions) {
+ SepPostingsWriterImpl.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
+ VariableGapTermsIndexReader.getIndexExtensions(extensions);
+ }
+}
Added: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simple64/Simple64VarIntCodec.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,204 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
+import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexInput;
+import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexOutput;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.TermsIndexReaderBase;
+import org.apache.lucene.index.codecs.TermsIndexWriterBase;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Simple64
+ *
+ * @lucene.experimental
+ */
+
+public class Simple64VarIntCodec extends Codec {
+
+ public Simple64VarIntCodec() {
+ name = "Simple64VarInt";
+ }
+
+ @Override
+ public String toString() {
+ return name;
+ }
+
+ // only for testing
+ public IntStreamFactory getIntFactory() {
+ return new Simple64IntFactory();
+ }
+
+ private class Simple64IntFactory extends IntStreamFactory {
+
+ @Override
+ public IntIndexInput openInput(Directory dir, final String fileName, int readBufferSize) throws IOException {
+ return new VariableIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) {
+
+ @Override
+ protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
+ return new BlockReader() {
+ public int readBlock() throws IOException {
+ //System.out.println("S64.readBlock in.fp=" + in.getFilePointer());
+ int count = Simple64.decompressSingle(in.readLong(), buffer, 0);
+ /*
+ System.out.println(" count=" + count);
+ for(int i=0;i<count;i++) {
+ System.out.println("" + buffer[i]);
+ }
+ */
+ return count;
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
+ return new VariableIntBlockIndexOutput(dir.createOutput(fileName), 61) {
+ final long[] result = new long[1];
+ final Simple64 compressor = new Simple64();
+
+ @Override
+ protected int add(int v) throws IOException {
+ final int consumed = compressor.add(v, result);
+ if (consumed != 0) {
+ //System.out.println("S64.write count=" + consumed + " out.fp=" + out.getFilePointer());
+ out.writeLong(result[0]);
+ }
+ return consumed;
+ }
+ };
+ }
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new Simple64IntFactory());
+
+ boolean success = false;
+ TermsIndexWriterBase indexWriter;
+ try {
+ indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
+ success = true;
+ } finally {
+ if (!success) {
+ postingsWriter.close();
+ }
+ }
+
+ success = false;
+ try {
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ postingsWriter.close();
+ } finally {
+ indexWriter.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
+ state.segmentInfo,
+ state.readBufferSize,
+ new Simple64IntFactory(), state.codecId);
+
+ TermsIndexReaderBase indexReader;
+ boolean success = false;
+ try {
+ indexReader = new VariableGapTermsIndexReader(state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ state.termsIndexDivisor,
+ state.codecId);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsReader.close();
+ }
+ }
+
+ success = false;
+ try {
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ try {
+ postingsReader.close();
+ } finally {
+ indexReader.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
+ SepPostingsReaderImpl.files(segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
+ VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
+ }
+
+ @Override
+ public void getExtensions(Set<String> extensions) {
+ SepPostingsWriterImpl.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
+ VariableGapTermsIndexReader.getIndexExtensions(extensions);
+ }
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java?rev=1066838&r1=1066837&r2=1066838&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java Thu Feb 3 15:52:33 2011
@@ -257,6 +257,7 @@ public class TestOmitTf extends LuceneTe
setSimilarityProvider(new SimpleSimilarity()).
setMergePolicy(newLogMergePolicy(2))
);
+ writer.setInfoStream(VERBOSE ? System.out : null);
StringBuilder sb = new StringBuilder(265);
String term = "term";
Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/CodecTestCase.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,66 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public abstract class CodecTestCase extends LuceneTestCase {
+
+ private static final int LIST_SIZE = 32768;
+
+ /**
+ * The different block sizes to test
+ */
+ protected static final int[] BLOCK_SIZES = {32, 256, 512, 2048};
+
+ /** both are inclusive! */
+ public static long nextLong(long start, long end) {
+ double r = random.nextDouble();
+ return (long) ((r * end) + ((1.0 - r) * start) + r);
+ }
+
+ public void doTestIntegerRange(final int minBits, final int maxBits, final int[] blockSizes) throws IOException {
+ final int[] input = new int[LIST_SIZE];
+
+ for (int i = minBits; i <= maxBits; i++) {
+ System.out.println("TEST bits=" + i);
+
+ final long min = i == 1 ? 0 : (1L << (i - 1));
+ final long max = ((1L << i) - 1);
+
+ for (int j = 0; j < LIST_SIZE; j++) {
+ input[j] = (int) nextLong(min, max);
+ }
+
+ for (final int blockSize : blockSizes) {
+ if (VERBOSE)
+ System.out.println("Perform Integer Range Test: bits = {" + i + "}, block size = {" + blockSize + "}");
+ this.doTest(input, blockSize);
+ }
+ }
+ }
+
+ public void doTestIntegerRange(final int minBits, final int maxBits) throws IOException {
+ this.doTestIntegerRange(minBits, maxBits, BLOCK_SIZES);
+ }
+
+ protected abstract void doTest(int[] input, int blockSize) throws IOException;
+
+}
Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64Test.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,127 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.index.codecs.CodecTestCase;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+public class Simple64Test extends CodecTestCase {
+
+ @Test
+ public void testSimple() throws IOException {
+ final int blockSize = 60;
+ final RAMDirectory dir = new RAMDirectory();
+ final String filename = Simple64.class.toString();
+ final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+ final IntIndexOutput output = factory.createOutput(dir, filename);
+ final int[] values = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
+
+ for (final int element : values) {
+ output.write(element);
+ }
+ output.close();
+
+ final IntIndexInput input = factory.openInput(dir, filename);
+ final BlockReader reader = input.reader();
+ int buffer[] = reader.getBuffer();
+ reader.fill();
+ for (int i = 0; i < values.length; i++) {
+ assertEquals("Error at record " + i, values[i], buffer[i]);
+ }
+ input.close();
+ dir.close();
+ }
+
+ @Test
+ public void testSimple16bits() throws IOException {
+ final int blockSize = 60;
+ final RAMDirectory dir = new RAMDirectory();
+ final String filename = Simple64.class.toString();
+ final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+ final IntIndexOutput output = factory.createOutput(dir, filename);
+ final int[] values = { 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,};
+
+ for (final int element : values) {
+ output.write(element);
+ }
+ output.close();
+
+ final IntIndexInput input = factory.openInput(dir, filename);
+ final BlockReader reader = input.reader();
+ int buffer[] = reader.getBuffer();
+ reader.fill();
+ for (int i = 0; i < values.length; i++) {
+ assertEquals("Error at record " + i, values[i], buffer[i]);
+ }
+ input.close();
+ dir.close();
+ }
+
+ @Override
+ public void doTest(final int[] values, final int blockSize)
+ throws IOException {
+ final RAMDirectory dir = new RAMDirectory();
+ final String filename = Simple64.class.toString();
+ final IntStreamFactory factory = new Simple64Codec(blockSize).getIntFactory();
+ final IntIndexOutput output = factory.createOutput(dir, filename);
+
+ for (final int element : values) {
+ output.write(element);
+ }
+ output.close();
+
+ final IntIndexInput input = factory.openInput(dir, filename);
+ final BlockReader reader = input.reader();
+ int buffer[] = reader.getBuffer();
+ int pointer = 0;
+ int pointerMax = reader.fill();
+ assertTrue(pointerMax > 0);
+
+ for(int i=0;i<values.length;i++) {
+ if (pointer == pointerMax) {
+ pointerMax = reader.fill();
+ assertTrue(pointerMax > 0);
+ pointer = 0;
+ }
+ assertEquals(values[i], buffer[pointer++]);
+ }
+
+ input.close();
+ dir.close();
+ }
+
+ @Test
+ public void testIntegerRange32() throws IOException {
+ this.doTestIntegerRange(1, 32);
+ }
+}
Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java?rev=1066838&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/simple64/Simple64VarIntTest.java Thu Feb 3 15:52:33 2011
@@ -0,0 +1,176 @@
+package org.apache.lucene.index.codecs.simple64;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.index.codecs.CodecTestCase;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.codecs.sep.IntIndexOutput;
+import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util._TestUtil;
+import org.junit.Test;
+
+public class Simple64VarIntTest extends CodecTestCase {
+
+ @Test
+ public void testSimple() throws IOException {
+ final int[] values = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
+ doTest(values, 0);
+ }
+
+ @Test
+ public void testSimple16bits() throws IOException {
+
+ // 60 values:
+ final int[] values = { 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,60149,60149,60149,60149,60149,60149,60149,60149,
+ 60149,};
+ doTest(values, 0);
+ }
+
+ // nocommit -- blockSize is unused:
+ @Override
+ public void doTest(final int[] values, int blockSize)
+ throws IOException {
+ final RAMDirectory dir = new RAMDirectory();
+ final String filename = Simple64.class.toString();
+ final IntStreamFactory factory = new Simple64VarIntCodec().getIntFactory();
+ final IntIndexOutput output = factory.createOutput(dir, filename);
+
+ if (VERBOSE) {
+ System.out.println("TEST: " + values.length + " values");
+ }
+
+ final IndexOutput indexOutput = dir.createOutput("index");
+ final List<Integer> indexed = new ArrayList<Integer>();
+
+ final IntIndexOutput.Index index = output.index();
+
+ for (int upto=0;upto<values.length;upto++) {
+ final int element = values[upto];
+ if (VERBOSE) {
+ System.out.println(" add " + element);
+ }
+ if (random.nextInt(20) == 17) {
+ index.mark();
+ index.write(indexOutput, true);
+ indexed.add(upto);
+ }
+ output.write(element);
+ }
+ output.close();
+ indexOutput.close();
+
+ final IntIndexInput input = factory.openInput(dir, filename);
+ final BlockReader reader = input.reader();
+ int buffer[] = reader.getBuffer();
+ int pointer = 0;
+ int pointerMax = reader.fill();
+ assertTrue(pointerMax > 0);
+
+ if (VERBOSE) {
+ System.out.println(" verify...");
+ }
+ for(int i=0;i<values.length;i++) {
+ if (pointer == pointerMax) {
+ pointerMax = reader.fill();
+ assertTrue(pointerMax > 0);
+ pointer = 0;
+ }
+ if (VERBOSE) {
+ System.out.println(" got " + buffer[pointer]);
+ }
+ assertEquals(values[i], buffer[pointer++]);
+ }
+
+ // Now test seeking:
+ if (indexed.size() != 0) {
+ final IndexInput indexInput = dir.openInput("index");
+ List<IntIndexInput.Index> indexes = new ArrayList<IntIndexInput.Index>();
+ for(int spot : indexed) {
+ IntIndexInput.Index index2 = input.index();
+ index2.read(indexInput, true);
+ indexes.add(index2);
+ }
+ indexInput.close();
+
+ for(int iter=0;iter<100;iter++) {
+ final int spot = random.nextInt(indexed.size());
+ if (VERBOSE) {
+ System.out.println("TEST: seek index=" + indexes.get(spot));
+ }
+ indexes.get(spot).seek(reader);
+ pointerMax = reader.end();
+ pointer = reader.offset();
+ int upto = indexed.get(spot);
+ int limit = Math.min(upto+20, values.length);
+ while(upto < limit) {
+ if (pointer == pointerMax) {
+ pointerMax = reader.fill();
+ assertTrue(pointerMax > 0);
+ pointer = 0;
+ }
+ if (VERBOSE) {
+ System.out.println(" got " + buffer[pointer]);
+ }
+ assertEquals(values[upto++], buffer[pointer++]);
+ }
+ }
+ }
+
+ input.close();
+ dir.close();
+ }
+
+ @Test
+ public void testRandom() throws Exception {
+ // nocommit mixup size of int[]
+ // nocommit more iters:
+ for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+ int size = _TestUtil.nextInt(random, 10, 1000);
+ int[] values = new int[size];
+ for(int i=0;i<values.length;i++) {
+ if (random.nextInt(20) == 17) {
+ values[i] = random.nextInt() & Integer.MAX_VALUE;
+ } else {
+ // duh -- & 3:
+ values[i] = random.nextInt() & 4;
+ }
+ }
+ doTest(values, 0);
+ }
+ }
+
+ @Test
+ public void testIntegerRange32() throws IOException {
+ this.doTestIntegerRange(1, 32);
+ }
+}