You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/16 14:55:26 UTC
svn commit: r1362013 - in
/lucene/dev/branches/pforcodec_3892/lucene/core/src:
java/org/apache/lucene/codecs/pfor/ test/org/apache/lucene/codecs/pfor/
Author: mikemccand
Date: Mon Jul 16 12:55:26 2012
New Revision: 1362013
URL: http://svn.apache.org/viewvc?rev=1362013&view=rev
Log:
LUCENE-3892: get numBytes & bit width into single int header
Modified:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java Mon Jul 16 12:55:26 2012
@@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock
public final class ForFactory extends IntStreamFactory {
- /* number of ints for each block */
- private final int blockSize;
-
public ForFactory() {
- this.blockSize = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
}
@Override
@@ -51,7 +47,7 @@ public final class ForFactory extends In
boolean success = false;
IndexOutput out = dir.createOutput(fileName, context);
try {
- IntIndexOutput ret = new ForIndexOutput(out, blockSize);
+ IntIndexOutput ret = new ForIndexOutput(out);
success = true;
return ret;
} finally {
@@ -85,10 +81,9 @@ public final class ForFactory extends In
private final IntBuffer encodedBuffer;
ForBlockReader(final IndexInput in, final int[] buffer) {
- // upperbound for encoded value should include:
- // 1. blockSize of normal value when numFrameBits=32(4x bytes);
- // 2. header (4bytes);
- this.encoded = new byte[blockSize*4+4];
+ // upperbound for encoded value should include(here header is not buffered):
+ // blockSize of normal value when numFrameBits=32(4x bytes);
+ this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
this.in = in;
this.buffer = buffer;
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
@@ -97,10 +92,11 @@ public final class ForFactory extends In
// TODO: implement public void skipBlock() {} ?
@Override
public void readBlock() throws IOException {
- final int numBytes = in.readInt();
- assert numBytes <= blockSize*4+4;
+ final int header = in.readInt();
+ final int numBytes = ForUtil.getEncodedSize(header);
+ assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4;
in.readBytes(encoded,0,numBytes);
- ForUtil.decompress(encodedBuffer,buffer);
+ ForUtil.decompress(encodedBuffer,buffer,header);
}
}
@@ -114,16 +110,17 @@ public final class ForFactory extends In
private final byte[] encoded;
private final IntBuffer encodedBuffer;
- ForIndexOutput(IndexOutput out, int blockSize) throws IOException {
- super(out,blockSize);
- this.encoded = new byte[blockSize*4+4];
+ ForIndexOutput(IndexOutput out) throws IOException {
+ super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE);
+ this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
}
@Override
protected void flushBlock() throws IOException {
- final int numBytes = ForUtil.compress(buffer,buffer.length,encodedBuffer);
- out.writeInt(numBytes);
+ final int header = ForUtil.compress(buffer,encodedBuffer);
+ final int numBytes = ForUtil.getEncodedSize(header);
+ out.writeInt(header);
out.writeBytes(encoded, numBytes);
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java Mon Jul 16 12:55:26 2012
@@ -17,48 +17,46 @@ package org.apache.lucene.codecs.pfor;
* limitations under the License.
*/
-import java.util.Set;
import java.io.IOException;
+import java.util.Set;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.codecs.BlockTreeTermsReader;
+import org.apache.lucene.codecs.BlockTreeTermsWriter;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.TermsIndexReaderBase;
-import org.apache.lucene.codecs.TermsIndexWriterBase;
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.TermsIndexReaderBase;
+import org.apache.lucene.codecs.TermsIndexWriterBase;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
/**
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
* customized postings format plugged.
*/
public final class ForPostingsFormat extends PostingsFormat {
- private final int blockSize;
private final int minBlockSize;
private final int maxBlockSize;
public final static int DEFAULT_BLOCK_SIZE = 128;
public ForPostingsFormat() {
super("For");
- this.blockSize = DEFAULT_BLOCK_SIZE;
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
}
public ForPostingsFormat(int minBlockSize, int maxBlockSize) {
super("For");
- this.blockSize = DEFAULT_BLOCK_SIZE;
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
this.maxBlockSize = maxBlockSize;
@@ -67,7 +65,7 @@ public final class ForPostingsFormat ext
@Override
public String toString() {
- return getName() + "(blocksize=" + blockSize + ")";
+ return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")";
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java Mon Jul 16 12:55:26 2012
@@ -25,7 +25,6 @@ import java.util.Arrays;
* which is determined by the max value in this block.
*/
public class ForUtil {
- public static final int HEADER_INT_SIZE=1;
protected static final int[] MASK = { 0x00000000,
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@@ -39,16 +38,30 @@ public class ForUtil {
* @param data uncompressed data
* @param size num of ints to compress
* @param intBuffer integer buffer to hold compressed data
+ * @return encoded block byte size
*/
- public static int compress(final int[] data, int size, IntBuffer intBuffer) {
- int numBits=getNumBits(data,size);
-
+ public static int compress(final int[] data, IntBuffer intBuffer) {
+ int numBits=getNumBits(data);
+ if (numBits == 0) {
+ return compressDuplicateBlock(data,intBuffer);
+ }
+
+ int size=data.length;
+ int encodedSize = (size*numBits+31)/32;
+
for (int i=0; i<size; ++i) {
encodeNormalValue(intBuffer,i,data[i], numBits);
}
- encodeHeader(intBuffer, size, numBits);
- return (HEADER_INT_SIZE+(size*numBits+31)/32)*4;
+ return getHeader(encodedSize, numBits);
+ }
+
+ /**
+ * Save only one int when the whole block equals to 1
+ */
+ static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
+ intBuffer.put(0,data[0]);
+ return getHeader(1, 0);
}
/** Decompress given Integer buffer into int array.
@@ -56,18 +69,13 @@ public class ForUtil {
* @param intBuffer integer buffer to hold compressed data
* @param data int array to hold uncompressed data
*/
- public static int decompress(IntBuffer intBuffer, int[] data) {
-
+ public static void decompress(IntBuffer intBuffer, int[] data, int header) {
// since this buffer is reused at upper level, rewind first
intBuffer.rewind();
- int header = intBuffer.get();
- int numInts = (header & MASK[8]) + 1;
int numBits = ((header >> 8) & MASK[6]);
decompressCore(intBuffer, data, numBits);
-
- return numInts;
}
/**
@@ -117,15 +125,10 @@ public class ForUtil {
}
}
- static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits) {
- int header = getHeader(numInts,numBits);
- intBuffer.put(0, header);
- }
-
static void encodeNormalValue(IntBuffer intBuffer, int pos, int value, int numBits) {
final int globalBitPos = numBits*pos; // position in bit stream
final int localBitPos = globalBitPos & 31; // position inside an int
- int intPos = HEADER_INT_SIZE + globalBitPos/32; // which integer to locate
+ int intPos = globalBitPos/32; // which integer to locate
setBufferIntBits(intBuffer, intPos, localBitPos, numBits, value);
if ((localBitPos + numBits) > 32) { // value does not fit in this int, fill tail
setBufferIntBits(intBuffer, intPos+1, 0,
@@ -145,8 +148,12 @@ public class ForUtil {
/**
* Estimate best num of frame bits according to the largest value.
*/
- static int getNumBits(final int[] data, int size) {
- int optBits=0;
+ static int getNumBits(final int[] data) {
+ if (isAllEqual(data)) {
+ return 0;
+ }
+ int size=data.length;
+ int optBits=1;
for (int i=0; i<size; ++i) {
while ((data[i] & ~MASK[optBits]) != 0) {
optBits++;
@@ -155,16 +162,37 @@ public class ForUtil {
return optBits;
}
+ protected static boolean isAllEqual(final int[] data) {
+ int len = data.length;
+ int v = data[0];
+ for (int i=1; i<len; i++) {
+ if (data[i] != v) {
+ return false;
+ }
+ }
+ return true;
+ }
+
/**
* Generate the 4 byte header, which contains (from lsb to msb):
*
- * - 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
- * - 6 bits for num of frame bits
- * - other bits unused
+ * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8)
+ * 6 bits for num of frame bits (when 0, values in this block are all the same)
+ * other bits unused
*
*/
- static int getHeader(int numInts, int numBits) {
- return (numInts-1)
+ static int getHeader(int encodedSize, int numBits) {
+ return (encodedSize)
| ((numBits) << 8);
}
+
+ /**
+ * Expert: get metadata from header.
+ */
+ public static int getEncodedSize(int header) {
+ return ((header & MASK[8]))*4;
+ }
+ public static int getNumBits(int header) {
+ return ((header >> 8) & MASK[6]);
+ }
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java Mon Jul 16 12:55:26 2012
@@ -39,11 +39,7 @@ import org.apache.lucene.codecs.intblock
public final class PForFactory extends IntStreamFactory {
- /* number of ints for each block */
- private final int blockSize;
-
public PForFactory() {
- this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE;
}
@Override
@@ -51,7 +47,7 @@ public final class PForFactory extends I
boolean success = false;
IndexOutput out = dir.createOutput(fileName, context);
try {
- IntIndexOutput ret = new PForIndexOutput(out, blockSize);
+ IntIndexOutput ret = new PForIndexOutput(out);
success = true;
return ret;
} finally {
@@ -85,11 +81,10 @@ public final class PForFactory extends I
private final IntBuffer encodedBuffer;
PForBlockReader(final IndexInput in, final int[] buffer) {
- // upperbound for encoded value should include:
+ // upperbound for encoded value should include(here header is not buffered):
// 1. blockSize of normal value (4x bytes);
// 2. blockSize of exception value (4x bytes);
- // 3. header (4bytes);
- this.encoded = new byte[blockSize*8+4];
+ this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
this.in = in;
this.buffer = buffer;
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
@@ -98,10 +93,11 @@ public final class PForFactory extends I
// TODO: implement public void skipBlock() {} ?
@Override
public void readBlock() throws IOException {
- final int numBytes = in.readInt();
- assert numBytes <= blockSize*8+4;
+ final int header = in.readInt();
+ final int numBytes = PForUtil.getEncodedSize(header);
+ assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8;
in.readBytes(encoded,0,numBytes);
- PForUtil.decompress(encodedBuffer,buffer);
+ PForUtil.decompress(encodedBuffer,buffer,header);
}
}
@@ -115,16 +111,17 @@ public final class PForFactory extends I
private final byte[] encoded;
private final IntBuffer encodedBuffer;
- PForIndexOutput(IndexOutput out, int blockSize) throws IOException {
- super(out,blockSize);
- this.encoded = new byte[blockSize*8+4];
+ PForIndexOutput(IndexOutput out) throws IOException {
+ super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE);
+ this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
}
@Override
protected void flushBlock() throws IOException {
- final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer);
- out.writeInt(numBytes);
+ final int header = PForUtil.compress(buffer,encodedBuffer);
+ final int numBytes = PForUtil.getEncodedSize(header);
+ out.writeInt(header);
out.writeBytes(encoded, numBytes);
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java Mon Jul 16 12:55:26 2012
@@ -45,20 +45,17 @@ import org.apache.lucene.util.IOUtils;
* customized postings format plugged.
*/
public final class PForPostingsFormat extends PostingsFormat {
- private final int blockSize;
private final int minBlockSize;
private final int maxBlockSize;
public final static int DEFAULT_BLOCK_SIZE = 128;
public PForPostingsFormat() {
super("PFor");
- this.blockSize = DEFAULT_BLOCK_SIZE;
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
}
public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
super("PFor");
- this.blockSize = DEFAULT_BLOCK_SIZE;
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
this.maxBlockSize = maxBlockSize;
@@ -67,7 +64,7 @@ public final class PForPostingsFormat ex
@Override
public String toString() {
- return getName() + "(blocksize=" + blockSize + ")";
+ return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")";
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java Mon Jul 16 12:55:26 2012
@@ -32,13 +32,17 @@ public final class PForUtil extends ForU
/** Compress given int[] into Integer buffer, with PFor format
*
* @param data uncompressed data
- * @param size num of ints to compress
* @param intBuffer integer buffer to hold compressed data
+ * @return block header
*/
- public static int compress(final int[] data, int size, IntBuffer intBuffer) {
+ public static int compress(final int[] data, IntBuffer intBuffer) {
/** estimate minimum compress size to determine numFrameBits */
- int numBits=getNumBits(data,size);
-
+ int numBits=getNumBits(data);
+ if (numBits == 0) {
+ return compressDuplicateBlock(data,intBuffer);
+ }
+
+ int size = data.length;
int[] excValues = new int[size];
int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1;
@@ -115,13 +119,13 @@ public final class PForUtil extends ForU
excBytes=4;
}
}
- excByteOffset = HEADER_INT_SIZE*4 + (size*numBits + 7)/8;
+ excByteOffset = (size*numBits + 7)/8;
encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
/** encode header */
- encodeHeader(intBuffer, size, numBits, excNum, excFirstPos, excBytes);
+ int encodedSize = (excByteOffset + excBytes*excNum + 3)/4;
- return (excByteOffset + excBytes*excNum + 3)/4*4;
+ return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes);
}
/** Decompress given Integer buffer into int array.
@@ -129,13 +133,10 @@ public final class PForUtil extends ForU
* @param intBuffer integer buffer to hold compressed data
* @param data int array to hold uncompressed data
*/
- public static int decompress(IntBuffer intBuffer, int[] data) {
-
+ public static void decompress(IntBuffer intBuffer, int[] data, int header) {
// since this buffer is reused at upper level, rewind first
intBuffer.rewind();
- int header = intBuffer.get();
- int numInts = (header & MASK[8]);
int excNum = ((header >> 8) & MASK[8]) + 1;
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
@@ -144,13 +145,6 @@ public final class PForUtil extends ForU
decompressCore(intBuffer, data, numBits);
patchException(intBuffer,data,excNum,excFirstPos,excBytes);
-
- return numInts;
- }
-
- static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
- int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes);
- intBuffer.put(0, header);
}
/**
@@ -190,6 +184,14 @@ public final class PForUtil extends ForU
}
/**
+ * Save only header when the whole block equals to 1
+ */
+ static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
+ intBuffer.put(0,data[0]);
+ return getHeader(1, 0, 0, -1, 0);
+ }
+
+ /**
* Decode exception values base on the exception pointers in normal area,
* and values in exception area.
* As for current implementation, numInts is hardwired as 128, so the
@@ -200,7 +202,6 @@ public final class PForUtil extends ForU
* In this case we should preprocess patch several heading exceptions,
* before calling this method.
*
- * TODO: blockSize is hardewired to size==128 only
*/
public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
if (excFirstPos == -1) {
@@ -251,13 +252,14 @@ public final class PForUtil extends ForU
* Estimate best number of frame bits according to minimum compressed size.
* It will run 32 times.
*/
- static int getNumBits(final int[] data, int size) {
- if (isAllZero(data))
+ static int getNumBits(final int[] data) {
+ if (isAllEqual(data)) {
return 0;
+ }
int optBits=1;
- int optSize=estimateCompressedSize(data,size,optBits);
+ int optSize=estimateCompressedSize(data,optBits);
for (int i=2; i<=32; ++i) {
- int curSize=estimateCompressedSize(data,size,i);
+ int curSize=estimateCompressedSize(data,i);
if (curSize<optSize) {
optSize=curSize;
optBits=i;
@@ -266,22 +268,13 @@ public final class PForUtil extends ForU
return optBits;
}
- static boolean isAllZero(final int[] data) {
- int len=data.length;
- for (int i=0; i<len; i++) {
- if (data[i] != 0) {
- return false;
- }
- }
- return true;
- }
-
/**
* Iterate the whole block to get maximum exception bits,
* and estimate compressed size without forced exception.
* TODO: foresee forced exception for better estimation
*/
- static int estimateCompressedSize(final int[] data, int size, int numBits) {
+ static int estimateCompressedSize(final int[] data, int numBits) {
+ int size=data.length;
int totalBytes=(numBits*size+7)/8; // always round to byte
int excNum=0;
int curExcBytes=1;
@@ -304,29 +297,24 @@ public final class PForUtil extends ForU
}
totalBytes+=excNum*curExcBytes;
- return totalBytes/4*4+HEADER_INT_SIZE; // round up to ints
+ return totalBytes/4*4; // round up to ints
}
/**
- * Generate the 4 byte header, which contains (from lsb to msb):
+ * Generate the 4 byte header which contains (from lsb to msb):
*
- * 8 bits for uncompressed int num - 1 (use up to 7 bits i.e 128 actually)
+ * 8 bits for encoded block int size (excluding header, this limits DEFAULT_BLOCK_SIZE <= 2^(8-1))
*
* 8 bits for exception num - 1 (when no exceptions, this is undefined)
*
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
*
- * 6 bits for num of frame bits
+ * 6 bits for num of frame bits (when 0, values in this block are all the same)
* 2 bits for the exception code: 00: byte, 01: short, 10: int
*
*/
- // TODO: exception num should never be equal with uncompressed int num!!!
- // first exception ranges from -1 ~ 255
- // the problem is that we don't need first exception to be -1 ...
- // it is ok to range from 0~255, and judge exception for exception num (0~255)
- // uncompressed int num: (1~256)
- static int getHeader(int numInts, int numBits, int excNum, int excFirstPos, int excBytes) {
- return (numInts-1)
+ static int getHeader(int encodedSize, int numBits, int excNum, int excFirstPos, int excBytes) {
+ return (encodedSize)
| (((excNum-1) & MASK[8]) << 8)
| ((excFirstPos+1) << 16)
| ((numBits) << 24)
@@ -337,8 +325,8 @@ public final class PForUtil extends ForU
/**
* Expert: get metadata from header.
*/
- public static int getNumInts(int header) {
- return (header & MASK[8]) + 1;
+ public static int getEncodedSize(int header) {
+ return ((header & MASK[8]))*4;
}
public static int getExcNum(int header) {
return ((header >> 8) & MASK[8]) + 1;
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java Mon Jul 16 12:55:26 2012
@@ -28,7 +28,7 @@ final class PackedIntsDecompress {
// NOTE: hardwired to blockSize == 128
public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
- Arrays.fill(output, 0);
+ Arrays.fill(output, compressedBuffer.get());
}
public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
final int numFrameBits = 1;
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py Mon Jul 16 12:55:26 2012
@@ -78,7 +78,7 @@ def genDecompress():
w('\n // NOTE: hardwired to blockSize == 128\n\n')
w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
- w(' Arrays.fill(output, 0);\n')
+ w(' Arrays.fill(output, compressedBuffer.get());\n')
w(' }\n')
for numFrameBits in xrange(1, 33):
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java?rev=1362013&r1=1362012&r2=1362013&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java Mon Jul 16 12:55:26 2012
@@ -46,39 +46,33 @@ public class TestPForUtil extends Lucene
}
/**
- * Should not encode extra information other than header
+ * Should not encode extra information other than single int
*/
- public void testPForAllZeros() throws Exception {
+ public void testAllEqual() throws Exception {
+ initRandom();
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
- int ensz;
int[] data=new int[sz];
- byte[] res = new byte[4+sz*8];
+ byte[] res = new byte[sz*8];
int[] copy = new int[sz];
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
+ int ensz;
+ int header;
- Arrays.fill(data,0);
- ensz = ForUtil.compress(data,sz,resBuffer); // test For
- ForUtil.decompress(resBuffer,copy);
+ Arrays.fill(data,gen.nextInt());
+ header = ForUtil.compress(data,resBuffer); // test For
+ ensz = ForUtil.getEncodedSize(header);
assert ensz == 4;
- assert cmp(data,sz,copy,sz)==true;
- Arrays.fill(data,0);
- ensz = PForUtil.compress(data,sz,resBuffer); // test PFor
- PForUtil.decompress(resBuffer,copy);
- assert ensz == 4;
+ ForUtil.decompress(resBuffer,copy,header);
assert cmp(data,sz,copy,sz)==true;
- }
-
- public void testForAllZeros() throws Exception {
- int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
- int ensz;
- int[] data=new int[sz];
- byte[] res = new byte[4+sz*8];
- int[] copy = new int[sz];
- IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
- ensz = ForUtil.compress(data,sz,resBuffer);
+ Arrays.fill(data,gen.nextInt());
+ header = PForUtil.compress(data,resBuffer); // test PFor
+ ensz = PForUtil.getEncodedSize(header);
+ assert ensz == 4;
+ PForUtil.decompress(resBuffer,copy,header);
+ assert cmp(data,sz,copy,sz)==true;
}
/**
@@ -89,7 +83,7 @@ public class TestPForUtil extends Lucene
initRandom();
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
int[] data=new int[sz];
- byte[] res = new byte[4+sz*8];
+ byte[] res = new byte[sz*8];
int[] copy = new int[sz];
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
int numBits = gen.nextInt(5)+1;
@@ -107,8 +101,8 @@ public class TestPForUtil extends Lucene
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
j++;
}
- ensz = PForUtil.compress(data,sz,resBuffer);
- header = resBuffer.get(0);
+ header = PForUtil.compress(data,resBuffer);
+ ensz = PForUtil.getEncodedSize(header);
expect = j;
got = PForUtil.getExcNum(header);
assert expect == got: expect+" expected but got "+got;
@@ -122,8 +116,8 @@ public class TestPForUtil extends Lucene
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
j++;
}
- ensz = PForUtil.compress(data,sz,resBuffer);
- header = resBuffer.get(0);
+ header = PForUtil.compress(data,resBuffer);
+ ensz = PForUtil.getEncodedSize(header);
expect = 2*(j-1)+1;
got = PForUtil.getExcNum(header);
assert expect == got: expect+" expected but got "+got;
@@ -137,8 +131,8 @@ public class TestPForUtil extends Lucene
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
j++;
}
- ensz = PForUtil.compress(data,sz,resBuffer);
- header = resBuffer.get(0);
+ header = PForUtil.compress(data,resBuffer);
+ ensz = PForUtil.getEncodedSize(header);
expect = 3*(j-1)+1;
got = PForUtil.getExcNum(header);
assert expect == got: expect+" expected but got "+got;
@@ -156,7 +150,7 @@ public class TestPForUtil extends Lucene
Integer[] buff= new Integer[sz];
int[] data = new int[sz];
int[] copy = new int[sz];
- byte[] res = new byte[4+sz*8];
+ byte[] res = new byte[sz*8];
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
int excIndex = gen.nextInt(sz/2);
@@ -176,13 +170,13 @@ public class TestPForUtil extends Lucene
for (int i=0; i<sz; ++i)
data[i] = buff[i];
- int ensz = PForUtil.compress(data,sz,resBuffer);
+ int header = PForUtil.compress(data,resBuffer);
+ int ensz = PForUtil.getEncodedSize(header);
- assert (ensz <= sz*8+4): ensz+" > "+sz*8+4; // must not exceed the loose upperbound
- assert (ensz >= 8); // at least we have a header along with an exception, right?
+ assert (ensz <= sz*8): ensz+" > "+sz*8; // must not exceed the loose upperbound
+ assert (ensz >= 4); // at least we have an exception, right?
- resBuffer.rewind();
- PForUtil.decompress(resBuffer,copy);
+ PForUtil.decompress(resBuffer,copy,header);
// println(getHex(data,sz)+"\n");
// println(getHex(res,ensz)+"\n");
@@ -211,7 +205,7 @@ public class TestPForUtil extends Lucene
int[] data = new int[sz];
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
double alpha=gen.nextDouble(); // rate of normal value
- for (int j=0; j<=32; ++j) {
+ for (int j=i; j<=32; ++j) {
createDistribution(data,sz,alpha,MASK[i],MASK[j]);
tryCompressAndDecompress(data, sz);
}
@@ -229,15 +223,16 @@ public class TestPForUtil extends Lucene
data[i] = buff[i];
}
public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
- byte[] res = new byte[4+sz*8]; // loosely upperbound
+ byte[] res = new byte[sz*8]; // loosely upperbound
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
- int ensz = PForUtil.compress(data,sz,resBuffer);
+ int header = PForUtil.compress(data,resBuffer);
+ int ensz = PForUtil.getEncodedSize(header);
- assert (ensz <= sz*8+4); // must not exceed the loose upperbound
+ assert (ensz <= sz*8); // must not exceed the loose upperbound
int[] copy = new int[sz];
- PForUtil.decompress(resBuffer,copy);
+ PForUtil.decompress(resBuffer,copy,header);
// println(getHex(data,sz)+"\n");
// println(getHex(res,ensz)+"\n");