You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/11 15:29:12 UTC
svn commit: r1360169 - in
/lucene/dev/branches/pforcodec_3892/lucene/core/src:
java/org/apache/lucene/codecs/pfor/ test/org/apache/lucene/codecs/pfor/
Author: mikemccand
Date: Wed Jul 11 13:29:11 2012
New Revision: 1360169
URL: http://svn.apache.org/viewvc?rev=1360169&view=rev
Log:
LUCENE-3892: docs/cleanup
Modified:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,5 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -32,12 +32,15 @@ import org.apache.lucene.codecs.intblock
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
/**
- * Stuff to pass to PostingsReader/WriterBase.
- * Things really make sense are: flushBlock() and readBlock()
+ * Used to plug to PostingsReader/WriterBase.
+ * Encoder and decoder in lower layers are called by
+ * flushBlock() and readBlock()
*/
public final class ForFactory extends IntStreamFactory {
- private final int blockSize;
+
+ /* number of ints for each block */
+ private final int blockSize;
public ForFactory() {
this.blockSize = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
@@ -53,6 +56,8 @@ public final class ForFactory extends In
return ret;
} finally {
if (!success) {
+ // For some cases (e.g. disk full), the IntIndexOutput may not be
+ // properly created. So we should close those opened files.
IOUtils.closeWhileHandlingException(out);
}
}
@@ -63,7 +68,10 @@ public final class ForFactory extends In
return new ForIndexInput(dir.openInput(fileName, context));
}
- // wrap input and output with buffer support
+ /**
+ * Here we'll hold both input buffer and output buffer for
+ * encoder/decoder.
+ */
private class ForIndexInput extends FixedIntBlockIndexInput {
ForIndexInput(final IndexInput in) throws IOException {
@@ -77,7 +85,10 @@ public final class ForFactory extends In
private final IntBuffer encodedBuffer;
ForBlockReader(final IndexInput in, final int[] buffer) {
- this.encoded = new byte[blockSize*8+4];
+ // upperbound for encoded value should include:
+ // 1. blockSize of normal value when numFrameBits=32(4x bytes);
+ // 2. header (4bytes);
+ this.encoded = new byte[blockSize*4+4];
this.in = in;
this.buffer = buffer;
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
@@ -87,7 +98,7 @@ public final class ForFactory extends In
@Override
public void readBlock() throws IOException {
final int numBytes = in.readInt();
- assert numBytes <= blockSize*8+4;
+ assert numBytes <= blockSize*4+4;
in.readBytes(encoded,0,numBytes);
ForUtil.decompress(encodedBuffer,buffer);
}
@@ -105,7 +116,7 @@ public final class ForFactory extends In
ForIndexOutput(IndexOutput out, int blockSize) throws IOException {
super(out,blockSize);
- this.encoded = new byte[blockSize*8+4];
+ this.encoded = new byte[blockSize*4+4];
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,6 @@
package org.apache.lucene.codecs.pfor;
-/**
+
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -37,10 +38,10 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
+
/**
- * This class actually only pass the ForFactory
- * to a PostingsWriter/ReaderBase, and get customized
- * format plugged.
+ * Pass ForFactory to a PostingsWriter/ReaderBase, and get
+ * customized postings format plugged.
*/
public final class ForPostingsFormat extends PostingsFormat {
private final int blockSize;
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,5 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -15,12 +15,15 @@ package org.apache.lucene.codecs.pfor;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-// nocommit: this is only a test verison, change from PForUtil.java
+
import java.nio.IntBuffer;
import java.nio.ByteBuffer;
import java.util.Arrays;
-// Encode all values in normal area, based on the bit size for max value
+/**
+ * Encode all values in normal area with fixed bit width,
+ * which is determined by the max value in this block.
+ */
public class ForUtil {
public static final int HEADER_INT_SIZE=1;
protected static final int[] MASK = { 0x00000000,
@@ -31,26 +34,52 @@ public class ForUtil {
0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
0x7fffffff, 0xffffffff};
+ /** Compress given int[] into Integer buffer, with For format
+ *
+ * @param data uncompressed data
+ * @param size num of ints to compress
+ * @param intBuffer integer buffer to hold compressed data
+ */
public static int compress(final int[] data, int size, IntBuffer intBuffer) {
int numBits=getNumBits(data,size);
for (int i=0; i<size; ++i) {
encodeNormalValue(intBuffer,i,data[i], numBits);
}
- // encode header
encodeHeader(intBuffer, size, numBits);
return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
}
-
+
+ /** Decompress given Integer buffer into int array.
+ *
+ * @param intBuffer integer buffer to hold compressed data
+ * @param data int array to hold uncompressed data
+ */
public static int decompress(IntBuffer intBuffer, int[] data) {
+
+ // since this buffer is reused at upper level, rewind first
intBuffer.rewind();
- int header = intBuffer.get();
+ int header = intBuffer.get();
int numInts = (header & MASK[8]) + 1;
int numBits = ((header >> 8) & MASK[5]) + 1;
- // TODO: PackedIntsDecompress is hardewired to size==128 only
+ decompressCore(intBuffer, data, numBits);
+
+ return numInts;
+ }
+
+ /**
+ * IntBuffer will not be rewinded in this method, therefore
+ * caller should ensure that the position is set to the first
+ * encoded int before decoding.
+ */
+ static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
+ assert numBits<=32;
+ assert numBits>=1;
+
+ // TODO: PackedIntsDecompress is hardewired to size==129 only
switch(numBits) {
case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
@@ -84,10 +113,7 @@ public class ForUtil {
case 30: PackedIntsDecompress.decode30(intBuffer, data); break;
case 31: PackedIntsDecompress.decode31(intBuffer, data); break;
case 32: PackedIntsDecompress.decode32(intBuffer, data); break;
- default:
- throw new IllegalStateException("Unknown numFrameBits " + numBits);
}
- return numInts;
}
static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
@@ -96,9 +122,9 @@ public class ForUtil {
}
static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) {
- final int globalBitPos = numBits*pos; // position in bit stream
- final int localBitPos = globalBitPos & 31; // position inside an int
- int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate
+ final int globalBitPos = numBits*pos; // position in bit stream
+ final int localBitPos = globalBitPos & 31; // position inside an int
+ int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate
setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value);
if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail
setBufferIntBits(intBuffer, intPos+1, 0,
@@ -115,7 +141,9 @@ public class ForUtil {
| (value << firstBitPos));
}
- // TODO: shall we use 32 NumBits directly if it exceeds 28 bits?
+ /**
+ * Estimate best num of frame bits according to the largest value.
+ */
static int getNumBits(final int[] data, int size) {
int optBits=1;
for (int i=0; i<size; ++i) {
@@ -125,12 +153,12 @@ public class ForUtil {
}
return optBits;
}
- /** The 4 byte header (32 bits) contains (from lsb to msb):
+
+ /**
+ * Generate the 4 byte header, which contains (from lsb to msb):
*
* - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
- *
* - 5 bits for num of frame bits - 1
- *
* - other bits unused
*
*/
@@ -138,38 +166,4 @@ public class ForUtil {
return (numInts-1)
| ((numBits-1) << 8);
}
-
- static void println(String format, Object... args) {
- System.out.println(String.format(format,args));
- }
- static void print(String format, Object... args) {
- System.out.print(String.format(format,args));
- }
- static void eprintln(String format, Object... args) {
- System.err.println(String.format(format,args));
- }
- public static String getHex( byte [] raw, int sz ) {
- final String HEXES = "0123456789ABCDEF";
- if ( raw == null ) return null;
- final StringBuilder hex = new StringBuilder( 2 * raw.length );
- for ( int i=0; i<sz; i++ ) {
- if (i>0 && (i)%16 == 0)
- hex.append("\n");
- byte b=raw[i];
- hex.append(HEXES.charAt((b & 0xF0) >> 4))
- .append(HEXES.charAt((b & 0x0F)))
- .append(" ");
- }
- return hex.toString();
- }
- public static String getHex( int [] raw, int sz ) {
- if ( raw == null ) return null;
- final StringBuilder hex = new StringBuilder( 4 * raw.length );
- for ( int i=0; i<sz; i++ ) {
- if (i>0 && i%8 == 0)
- hex.append("\n");
- hex.append(String.format("%08x ",raw[i]));
- }
- return hex.toString();
- }
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,5 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -32,11 +32,14 @@ import org.apache.lucene.codecs.intblock
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
/**
- * Stuff to pass to PostingsReader/WriterBase.
- * Things really make sense are: flushBlock() and readBlock()
+ * Used to plug to PostingsReader/WriterBase.
+ * Encoder and decoder in lower layers are called by
+ * flushBlock() and readBlock()
*/
public final class PForFactory extends IntStreamFactory {
+
+ /* number of ints for each block */
private final int blockSize;
public PForFactory() {
@@ -53,6 +56,8 @@ public final class PForFactory extends I
return ret;
} finally {
if (!success) {
+ // For some cases (e.g. disk full), the IntIndexOutput may not be
+ // properly created. So we should close those opened files.
IOUtils.closeWhileHandlingException(out);
}
}
@@ -63,7 +68,10 @@ public final class PForFactory extends I
return new PForIndexInput(dir.openInput(fileName, context));
}
- // wrap input and output with buffer support
+ /**
+ * Here we'll hold both input buffer and output buffer for
+ * encoder/decoder.
+ */
private class PForIndexInput extends FixedIntBlockIndexInput {
PForIndexInput(final IndexInput in) throws IOException {
@@ -77,6 +85,10 @@ public final class PForFactory extends I
private final IntBuffer encodedBuffer;
PForBlockReader(final IndexInput in, final int[] buffer) {
+ // upperbound for encoded value should include:
+ // 1. blockSize of normal value (4x bytes);
+ // 2. blockSize of exception value (4x bytes);
+ // 3. header (4bytes);
this.encoded = new byte[blockSize*8+4];
this.in = in;
this.buffer = buffer;
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,6 @@
package org.apache.lucene.codecs.pfor;
-/**
+
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -37,10 +38,10 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
+
/**
- * This class actually only pass the PForFactory
- * to a PostingsWriter/ReaderBase, and get customized
- * format plugged.
+ * Pass PForFactory to a PostingsWriter/ReaderBase, and get
+ * customized postings format plugged.
*/
public final class PForPostingsFormat extends PostingsFormat {
private final int blockSize;
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,5 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -20,26 +20,45 @@ import java.nio.IntBuffer;
import java.nio.ByteBuffer;
import java.util.Arrays;
-// Encode all small values and exception pointers in normal area,
-// Encode large values in exception area.
-// Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes
+/**
+ * Encode all small values and exception pointers in normal area;
+ * Encode large values in exception area;
+ * Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes
+ */
public final class PForUtil extends ForUtil {
+
protected static final int[] PER_EXCEPTION_SIZE = {1,2,4};
+ /** Compress given int[] into Integer buffer, with PFor format
+ *
+ * @param data uncompressed data
+ * @param size num of ints to compress
+ * @param intBuffer integer buffer to hold compressed data
+ */
public static int compress(final int[] data, int size, IntBuffer intBuffer) {
+ /** estimate minimum compress size to determine numFrameBits */
int numBits=getNumBits(data,size);
int[] excValues = new int[size];
- int excNum = 0, excLastPos = -1, excFirstPos = -1;
- int excLastNonForcePos = -1;
- int excNumBase = 0; // num of exception until the last non-force exception
- int excBytes = 1; // bytes per exception
- int excByteOffset = 0; // bytes of preceeding codes like header and normal area
- long maxChain = (1<<8) - 2; // header bits limits this to 254
+ int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1;
+
+ // num of exception until the last non-forced exception
+ int excNumBase = 0;
+
+ // bytes per exception
+ int excBytes = 1;
+
+ // bytes before exception area, e.g. header and normal area
+ int excByteOffset = 0;
+
+ // the max value possible for current exception pointer,
+ // value of the first pointer is limited by header as 254
+ long maxChain = (1<<8) - 2;
+
boolean conValue, conForce, conEnd;
int i=0;
- // estimate exceptions
+ /** estimate exceptions */
for (i=0; i<size; ++i) {
conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
conForce = (i >= maxChain + excLastPos); // force exception
@@ -57,7 +76,7 @@ public final class PForUtil extends ForU
}
}
- // encode normal area, record exception positions
+ /** encode normal area, record exception positions */
i=0;
excNum = 0;
if (excFirstPos < 0) { // no exception
@@ -90,7 +109,7 @@ public final class PForUtil extends ForU
}
}
- // encode exception area
+ /** encode exception area */
i=0;
for (; i<excNum; ++i) {
if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
@@ -103,60 +122,33 @@ public final class PForUtil extends ForU
excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8;
encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
- // encode header
+ /** encode header */
encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes);
return (excByteOffset + excBytes*excNum + 3)/4*4;
}
+ /** Decompress given Integer buffer into int array.
+ *
+ * @param intBuffer integer buffer to hold compressed data
+ * @param data int array to hold uncompressed data
+ */
public static int decompress(IntBuffer intBuffer, int[] data) {
+
+ // since this buffer is reused at upper level, rewind first
intBuffer.rewind();
- int header = intBuffer.get();
+ int header = intBuffer.get();
int numInts = (header & MASK[8]) + 1;
int excNum = ((header >> 8) & MASK[8]) + 1;
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
int excBytes = PER_EXCEPTION_SIZE[(header >> 29) & MASK[2]];
int numBits = ((header >> 24) & MASK[5]) + 1;
- // TODO: PackedIntsDecompress is hardewired to size==128 only
- switch(numBits) {
- case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
- case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
- case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
- case 4: PackedIntsDecompress.decode4(intBuffer, data); break;
- case 5: PackedIntsDecompress.decode5(intBuffer, data); break;
- case 6: PackedIntsDecompress.decode6(intBuffer, data); break;
- case 7: PackedIntsDecompress.decode7(intBuffer, data); break;
- case 8: PackedIntsDecompress.decode8(intBuffer, data); break;
- case 9: PackedIntsDecompress.decode9(intBuffer, data); break;
- case 10: PackedIntsDecompress.decode10(intBuffer, data); break;
- case 11: PackedIntsDecompress.decode11(intBuffer, data); break;
- case 12: PackedIntsDecompress.decode12(intBuffer, data); break;
- case 13: PackedIntsDecompress.decode13(intBuffer, data); break;
- case 14: PackedIntsDecompress.decode14(intBuffer, data); break;
- case 15: PackedIntsDecompress.decode15(intBuffer, data); break;
- case 16: PackedIntsDecompress.decode16(intBuffer, data); break;
- case 17: PackedIntsDecompress.decode17(intBuffer, data); break;
- case 18: PackedIntsDecompress.decode18(intBuffer, data); break;
- case 19: PackedIntsDecompress.decode19(intBuffer, data); break;
- case 20: PackedIntsDecompress.decode20(intBuffer, data); break;
- case 21: PackedIntsDecompress.decode21(intBuffer, data); break;
- case 22: PackedIntsDecompress.decode22(intBuffer, data); break;
- case 23: PackedIntsDecompress.decode23(intBuffer, data); break;
- case 24: PackedIntsDecompress.decode24(intBuffer, data); break;
- case 25: PackedIntsDecompress.decode25(intBuffer, data); break;
- case 26: PackedIntsDecompress.decode26(intBuffer, data); break;
- case 27: PackedIntsDecompress.decode27(intBuffer, data); break;
- case 28: PackedIntsDecompress.decode28(intBuffer, data); break;
- case 29: PackedIntsDecompress.decode29(intBuffer, data); break;
- case 30: PackedIntsDecompress.decode30(intBuffer, data); break;
- case 31: PackedIntsDecompress.decode31(intBuffer, data); break;
- case 32: PackedIntsDecompress.decode32(intBuffer, data); break;
- default:
- throw new IllegalStateException("Unknown numFrameBits " + numBits);
- }
+ decompressCore(intBuffer, data, numBits);
+
patchException(intBuffer,data,excNum,excFirstPos,excBytes);
+
return numInts;
}
@@ -196,9 +188,19 @@ public final class PForUtil extends ForU
}
}
- // TODO: since numInts===128, we don't need to rewind intBuffer.
- // however, tail of normal area may share a same int with head of exception area
- // which means patchException may lose heading exceptions.
+ /**
+ * Decode exception values base on the exception pointers in normal area,
+ * and values in exception area.
+ * As for current implementation, numInts is hardwired as 128, so the
+ * tail of normal area is naturally aligned to 32 bits, and we don't need to
+ * rewind intBuffer here.
+ * However, the normal area may share a same int with exception area,
+ * when numFrameBits * numInts % 32 != 0,
+ * In this case we should preprocess patch several heading exceptions,
+ * before calling this method.
+ *
+ * TODO: blockSize is hardewired to size==128 only
+ */
public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
if (excFirstPos == -1) {
return;
@@ -206,7 +208,7 @@ public final class PForUtil extends ForU
int curPos=excFirstPos;
int i,j;
- if (excBytes == 1) {
+ if (excBytes == 1) { // each exception consumes 1 byte
for (i=0; i+3<excNum; i+=4) {
final int curInt = intBuffer.get();
curPos = patch(data, curPos, (curInt) & MASK[8]);
@@ -214,13 +216,13 @@ public final class PForUtil extends ForU
curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]);
curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]);
}
- if (i<excNum) {
+ if (i<excNum) {
final int curInt = intBuffer.get();
for (j=0; j<32 && i<excNum; j+=8,i++) {
curPos = patch(data, curPos, (curInt >>> j) & MASK[8]);
}
}
- } else if (excBytes == 2) {
+ } else if (excBytes == 2) { // each exception consumes 2 bytes
for (i=0; i+1<excNum; i+=2) {
final int curInt = intBuffer.get();
curPos = patch(data, curPos, (curInt) & MASK[16]);
@@ -230,7 +232,7 @@ public final class PForUtil extends ForU
final int curInt = intBuffer.get();
curPos = patch(data, curPos, (curInt) & MASK[16]);
}
- } else if (excBytes == 4) {
+ } else if (excBytes == 4) { // each exception consumes 4 bytes
for (i=0; i<excNum; i++) {
curPos = patch(data, curPos, intBuffer.get());
}
@@ -244,7 +246,10 @@ public final class PForUtil extends ForU
return nextPos;
}
- // TODO: shall we use 32 NumBits directly if it exceeds 28 bits?
+ /**
+ * Estimate best number of frame bits according to minimum compressed size.
+ * It will run 32 times.
+ */
static int getNumBits(final int[] data, int size) {
int optBits=1;
int optSize=estimateCompressedSize(data,size,1);
@@ -258,8 +263,11 @@ public final class PForUtil extends ForU
return optBits;
}
- // loosely estimate int size of each compressed block, based on parameter b
- // ignore force exceptions
+ /**
+ * Iterate the whole block to get maximum exception bits,
+ * and estimate compressed size without forced exception.
+ * TODO: foresee forced exception for better estimation
+ */
static int estimateCompressedSize(final int[] data, int size, int numBits) {
int totalBytes=(numBits*size+7)/8; // always round to byte
int excNum=0;
@@ -285,17 +293,19 @@ public final class PForUtil extends ForU
return totalBytes/4*4+HEADER_INT_SIZE; // round up to ints
}
- /** The 4 byte header (32 bits) contains (from lsb to msb):
+
+ /**
+ * Generate the 4 byte header, which contains (from lsb to msb):
*
- * - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
+ * 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
*
- * - 8 bits for exception num - 1 (when no exceptions, this is undefined)
+ * 8 bits for exception num - 1 (when no exceptions, this is undefined)
*
- * - 8 bits for the index of the first exception + 1 (when no exception, this is 0)
+ * 8 bits for the index of the first exception + 1 (when no exception, this is 0)
*
- * - 5 bits for num of frame bits - 1
- * - 2 bits for the exception code: 00: byte, 01: short, 10: int
- * - 1 bit unused
+ * 5 bits for num of frame bits - 1
+ * 2 bits for the exception code: 00: byte, 01: short, 10: int
+ * 1 bit unused
*
*/
static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java Wed Jul 11 13:29:11 2012
@@ -1,5 +1,5 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -15,7 +15,8 @@ package org.apache.lucene.codecs.pfor;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
- /* This code is generated, do not modify. See gendecompress.py */
+
+/* This code is generated, do not modify. See gendecompress.py */
import java.nio.IntBuffer;
@@ -24,6 +25,7 @@ final class PackedIntsDecompress {
// nocommit: assess perf of this to see if specializing is really needed
// NOTE: hardwired to blockSize == 128
+
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 1;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -65,8 +67,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode2(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 2;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -109,8 +109,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode3(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 3;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -154,8 +152,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode4(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 4;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -200,8 +196,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode5(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 5;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -247,8 +241,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode6(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 6;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -295,8 +287,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode7(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 7;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -344,8 +334,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode8(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 8;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -394,8 +382,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode9(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 9;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -445,8 +431,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode10(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 10;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -497,8 +481,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode11(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 11;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -550,8 +532,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode12(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 12;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -604,8 +584,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode13(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 13;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -659,8 +637,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode14(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 14;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -715,8 +691,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode15(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 15;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -772,8 +746,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode16(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 16;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -830,8 +802,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode17(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 17;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -889,8 +859,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode18(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 18;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -949,8 +917,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode19(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 19;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1010,8 +976,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode20(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 20;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1072,8 +1036,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode21(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 21;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1135,8 +1097,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode22(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 22;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1199,8 +1159,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode23(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 23;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1264,8 +1222,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode24(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 24;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1330,8 +1286,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode25(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 25;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1397,8 +1351,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode26(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 26;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1465,8 +1417,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode27(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 27;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1534,8 +1484,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode28(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 28;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1604,8 +1552,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode29(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 29;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1675,8 +1621,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode30(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 30;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1747,8 +1691,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode31(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 31;
final int mask = (int) ((1L<<numFrameBits) - 1);
@@ -1820,8 +1762,6 @@ final class PackedIntsDecompress {
outputOffset += 32;
}
}
-
- // NOTE: hardwired to blockSize == 128
public static void decode32(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 32;
final int mask = (int) ((1L<<numFrameBits) - 1);
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py Wed Jul 11 13:29:11 2012
@@ -17,19 +17,14 @@
"""
"""
-Generate source code for java classes for FOR decompression.
+Generate source code for java classes for For or PFor decompression.
"""
-USE_SCRATCH = False
-
def bitsExpr(i, numFrameBits):
framePos = i * numFrameBits
intValNum = (framePos / 32)
bitPos = framePos % 32
- if USE_SCRATCH:
- bitsInInt = "inputInts[" + str(intValNum) + "]"
- else:
- bitsInInt = "intValue" + str(intValNum)
+ bitsInInt = "intValue" + str(intValNum)
needBrackets = 0
if bitPos > 0:
bitsInInt += " >>> " + str(bitPos)
@@ -37,10 +32,7 @@ def bitsExpr(i, numFrameBits):
if bitPos + numFrameBits > 32:
if needBrackets:
bitsInInt = "(" + bitsInInt + ")"
- if USE_SCRATCH:
- bitsInInt += " | (inputInts[" + str(intValNum+1) + "] << "+ str(32 - bitPos) + ")"
- else:
- bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
+ bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")"
needBrackets = 1
if bitPos + numFrameBits != 32:
if needBrackets:
@@ -57,7 +49,7 @@ def genDecompress():
w = f.write
try:
w("package org.apache.lucene.codecs.pfor;\n")
- w("""/**
+ w("""/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -75,50 +67,36 @@ def genDecompress():
*/
""")
- w("/* This code is generated, do not modify. See gendecompress.py */\n\n")
+ w("\n/* This code is generated, do not modify. See gendecompress.py */\n\n")
w("import java.nio.IntBuffer;\n\n")
w("final class PackedIntsDecompress {\n")
w('\n // nocommit: assess perf of this to see if specializing is really needed\n')
+ w('\n // NOTE: hardwired to blockSize == 128\n\n')
- # previous version only handle int less(or equal) than 31 bits
- # try to support 32 bits here
for numFrameBits in xrange(1, 33):
-
- w('\n // NOTE: hardwired to blockSize == 128\n')
- if USE_SCRATCH:
- w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output, final int[] scratch) {\n' % numFrameBits)
- else:
- w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
-
+ w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
w(' final int numFrameBits = %d;\n' % numFrameBits)
w(' final int mask = (int) ((1L<<numFrameBits) - 1);\n')
w(' int outputOffset = 0;\n')
-
w(' for(int step=0;step<4;step++) {\n')
- if USE_SCRATCH:
- w(' compressedBuffer.get(scratch, 0, %d);\n' % numFrameBits)
- else:
- for i in range(numFrameBits): # declare int vars and init from buffer
- w(" int intValue" + str(i) + " = compressedBuffer.get();\n")
+ for i in range(numFrameBits): # declare int vars and init from buffer
+ w(" int intValue" + str(i) + " = compressedBuffer.get();\n")
for i in range(32): # set output from int vars
w(" output[" + str(i) + " + outputOffset] = " + bitsExpr(i, numFrameBits) + ";\n")
+
w(' outputOffset += 32;\n')
w(' }\n')
w(' }\n')
+
w('}\n')
finally:
f.close()
-def genSwitch():
- for numFrameBits in xrange(1, 33):
- print ' case %d: PackedIntsDecompress.decode%d(compressedBuffer, encoded); break;' % (numFrameBits, numFrameBits)
-
if __name__ == "__main__":
genDecompress()
- #genSwitch()
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java?rev=1360169&r1=1360168&r2=1360169&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java Wed Jul 11 13:29:11 2012
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.pfor;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,12 +17,21 @@ package org.apache.lucene.codecs.pfor;
* limitations under the License.
*/
-import java.util.*;
-import java.io.*;
-import java.nio.*;
-import org.apache.lucene.codecs.pfor.*;
+import java.util.Collections;
+import java.util.Arrays;
+import java.util.Random;
+import java.nio.IntBuffer;
+import java.nio.ByteBuffer;
+import org.apache.lucene.codecs.pfor.PForUtil;
+import org.apache.lucene.codecs.pfor.ForPostingsFormat;
import org.apache.lucene.util.LuceneTestCase;
+/**
+ * Test the core utility for PFor compress and decompress
+ * We don't provide test case for For encoder/decoder, since
+ * PFor is a extended version of For, and most methods will be reused
+ * here.
+ */
public class TestPForUtil extends LuceneTestCase {
static final int[] MASK={ 0x00000000,
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
@@ -32,21 +41,18 @@ public class TestPForUtil extends Lucene
0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
0x7fffffff, 0xffffffff};
Random gen;
- long seed=System.currentTimeMillis();
- //long seed=1338528171959L;
public void initRandom() {
- // println("Seed: "+seed);
- this.gen = new Random(seed);
- }
- public void testCompress() throws Exception {
- initRandom();
- tryForcedException();
- tryAllDistribution();
+ this.gen = random();
}
- // Test correctness of ignored forced exception
- public void tryForcedException() throws Exception {
- int sz=128;
+ /**
+ * Test correctness of ignored forced exception.
+ * The trailing forced exceptions shouldn't be reverted
+ * since they're not necessary.
+ */
+ public void testForcedException() throws Exception {
+ initRandom();
+ int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
Integer[] buff= new Integer[sz];
int[] data = new int[sz];
int[] copy = new int[sz];
@@ -56,16 +62,14 @@ public class TestPForUtil extends Lucene
buff[i]=gen.nextInt() & 1;
buff[sz-1]=gen.nextInt() & 0xffffffff; // create only one exception
- Collections.shuffle(Arrays.asList(buff),new Random(seed));
+ Collections.shuffle(Arrays.asList(buff),gen);
for (int i=0; i<sz; ++i)
data[i] = buff[i];
int ensz = PForUtil.compress(data,sz,resBuffer);
- if (ensz > sz*8+4) {
- println("Excceed? "+ensz+">"+(sz*8+4));
- ensz=sz*8+4;
- }
+ assert (ensz <= sz*8+4); // must not exceed the loose upperbound
+
resBuffer.rewind();
PForUtil.decompress(resBuffer,copy);
@@ -76,12 +80,17 @@ public class TestPForUtil extends Lucene
assert cmp(data,sz,copy,sz)==true;
}
- // Test correctness of compressing and decompressing
- public void tryAllDistribution() throws Exception {
+ /**
+ * Test correctness of compressing and decompressing.
+ * Here we randomly assign a rate of exception (i.e. 1-alpha),
+ * and test different scale of normal/exception values.
+ */
+ public void testAllDistribution() throws Exception {
+ initRandom();
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
double alpha=gen.nextDouble(); // rate of normal value
for (int j=0; j<=32; ++j) {
- tryDistribution(128,alpha,MASK[i],MASK[j]);
+ tryDistribution(ForPostingsFormat.DEFAULT_BLOCK_SIZE,alpha,MASK[i],MASK[j]);
}
}
}
@@ -95,16 +104,14 @@ public class TestPForUtil extends Lucene
buff[i]=gen.nextInt() & masknorm;
for (; i<sz; ++i)
buff[i]=gen.nextInt() & maskexc;
- Collections.shuffle(Arrays.asList(buff),new Random(seed));
+ Collections.shuffle(Arrays.asList(buff),gen);
for (i=0; i<sz; ++i)
data[i] = buff[i];
int ensz = PForUtil.compress(data,sz,resBuffer);
- if (ensz > sz*8+4) {
- println("Excceed? "+ensz+">"+(sz*8+4));
- ensz=sz*8+4;
- }
+ assert (ensz <= sz*8+4); // must not exceed the loose upperbound
+
int[] copy = new int[sz];
PForUtil.decompress(resBuffer,copy);