You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/08/09 14:52:24 UTC
svn commit: r1371184 - in
/lucene/dev/branches/pforcodec_3892/lucene/core/src:
java/org/apache/lucene/codecs/blockpacked/
test/org/apache/lucene/codecs/blockpacked/
Author: jpountz
Date: Thu Aug 9 12:52:24 2012
New Revision: 1371184
URL: http://svn.apache.org/viewvc?rev=1371184&view=rev
Log:
LUCENE-3892: fix constant names, cache encoded sizes instead of formats and move readVIntBlock to the postings reader.
Modified:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java?rev=1371184&r1=1371183&r2=1371184&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java Thu Aug 9 12:52:24 2012
@@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpa
*/
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.Arrays;
@@ -122,6 +122,28 @@ public final class BlockPackedPostingsRe
}
}
+ /**
+ * Read values that have been written using variable-length encoding instead of bit-packing.
+ */
+ private static void readVIntBlock(IndexInput docIn, int[] docBuffer,
+ int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
+ if (indexHasFreq) {
+ for(int i=0;i<num;i++) {
+ final int code = docIn.readVInt();
+ docBuffer[i] = code >>> 1;
+ if ((code & 1) != 0) {
+ freqBuffer[i] = 1;
+ } else {
+ freqBuffer[i] = docIn.readVInt();
+ }
+ }
+ } else {
+ for(int i=0;i<num;i++) {
+ docBuffer[i] = docIn.readVInt();
+ }
+ }
+ }
+
// Must keep final because we do non-standard clone
private final static class IntBlockTermState extends BlockTermState {
long docStartFP;
@@ -298,8 +320,8 @@ public final class BlockPackedPostingsRe
final class BlockDocsEnum extends DocsEnum {
private final byte[] encoded;
- private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
- private final int[] freqBuffer = new int[MIN_DATA_SIZE];
+ private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+ private final int[] freqBuffer = new int[MAX_DATA_SIZE];
private int docBufferUpto;
@@ -341,7 +363,7 @@ public final class BlockPackedPostingsRe
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
- encoded = new byte[MIN_ENCODED_SIZE];
+ encoded = new byte[MAX_ENCODED_SIZE];
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -404,7 +426,7 @@ public final class BlockPackedPostingsRe
if (DEBUG) {
System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer());
}
- ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
}
docBufferUpto = 0;
}
@@ -548,9 +570,9 @@ public final class BlockPackedPostingsRe
private final byte[] encoded;
- private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
- private final int[] freqBuffer = new int[MIN_DATA_SIZE];
- private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
+ private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+ private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+ private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
private int docBufferUpto;
private int posBufferUpto;
@@ -609,7 +631,7 @@ public final class BlockPackedPostingsRe
this.startDocIn = BlockPackedPostingsReader.this.docIn;
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
- encoded = new byte[MIN_ENCODED_SIZE];
+ encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
}
@@ -678,7 +700,7 @@ public final class BlockPackedPostingsRe
if (DEBUG) {
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
}
- ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
docBufferUpto = 0;
}
@@ -952,9 +974,9 @@ public final class BlockPackedPostingsRe
private final byte[] encoded;
- private final int[] docDeltaBuffer = new int[MIN_DATA_SIZE];
- private final int[] freqBuffer = new int[MIN_DATA_SIZE];
- private final int[] posDeltaBuffer = new int[MIN_DATA_SIZE];
+ private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+ private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+ private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
private final int[] payloadLengthBuffer;
private final int[] offsetStartDeltaBuffer;
@@ -1032,11 +1054,11 @@ public final class BlockPackedPostingsRe
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPackedPostingsReader.this.posIn.clone();
this.payIn = (IndexInput) BlockPackedPostingsReader.this.payIn.clone();
- encoded = new byte[MIN_ENCODED_SIZE];
+ encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (indexHasOffsets) {
- offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
- offsetLengthBuffer = new int[MIN_DATA_SIZE];
+ offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+ offsetLengthBuffer = new int[MAX_DATA_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
@@ -1046,7 +1068,7 @@ public final class BlockPackedPostingsRe
indexHasPayloads = fieldInfo.hasPayloads();
if (indexHasPayloads) {
- payloadLengthBuffer = new int[MIN_DATA_SIZE];
+ payloadLengthBuffer = new int[MAX_DATA_SIZE];
payloadBytes = new byte[128];
payload = new BytesRef();
} else {
@@ -1120,7 +1142,7 @@ public final class BlockPackedPostingsRe
if (DEBUG) {
System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
}
- ForUtil.readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
docBufferUpto = 0;
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java?rev=1371184&r1=1371183&r2=1371184&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsWriter.java Thu Aug 9 12:52:24 2012
@@ -19,8 +19,8 @@ package org.apache.lucene.codecs.blockpa
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsReader.DEBUG;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.ArrayList;
@@ -125,22 +125,22 @@ public final class BlockPackedPostingsWr
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
if (state.fieldInfos.hasProx()) {
- posDeltaBuffer = new int[MIN_DATA_SIZE];
+ posDeltaBuffer = new int[MAX_DATA_SIZE];
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION),
state.context);
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
if (state.fieldInfos.hasPayloads()) {
payloadBytes = new byte[128];
- payloadLengthBuffer = new int[MIN_DATA_SIZE];
+ payloadLengthBuffer = new int[MAX_DATA_SIZE];
} else {
payloadBytes = null;
payloadLengthBuffer = null;
}
if (state.fieldInfos.hasOffsets()) {
- offsetStartDeltaBuffer = new int[MIN_DATA_SIZE];
- offsetLengthBuffer = new int[MIN_DATA_SIZE];
+ offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+ offsetLengthBuffer = new int[MAX_DATA_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
@@ -167,8 +167,8 @@ public final class BlockPackedPostingsWr
}
}
- docDeltaBuffer = new int[MIN_DATA_SIZE];
- freqBuffer = new int[MIN_DATA_SIZE];
+ docDeltaBuffer = new int[MAX_DATA_SIZE];
+ freqBuffer = new int[MAX_DATA_SIZE];
// nocommit should we try skipping every 2/4 blocks...?
skipWriter = new BlockPackedSkipWriter(maxSkipLevels,
@@ -178,7 +178,7 @@ public final class BlockPackedPostingsWr
posOut,
payOut);
- encoded = new byte[MIN_ENCODED_SIZE];
+ encoded = new byte[MAX_ENCODED_SIZE];
}
public BlockPackedPostingsWriter(SegmentWriteState state) throws IOException {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java?rev=1371184&r1=1371183&r2=1371184&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/ForUtil.java Thu Aug 9 12:52:24 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.store.DataOutpu
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedInts.Decoder;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
/**
@@ -41,14 +42,18 @@ final class ForUtil {
private static final int PACKED_INTS_VERSION = 0; // nocommit: encode in the stream?
/**
- * Minimum length of the buffer that holds encoded bytes.
+ * Upper limit of the number of bytes that might be required to stored
+ * <code>BLOCK_SIZE</code> encoded values.
*/
- static final int MIN_ENCODED_SIZE = BLOCK_SIZE * 4;
+ static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
/**
- * Minimum length of the buffer that holds data.
+ * Upper limit of the number of values that might be decoded in a single call to
+ * {@link #readBlock(IndexInput, byte[], int[])}. Although values after
+ * <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
+ * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
*/
- static final int MIN_DATA_SIZE;
+ static final int MAX_DATA_SIZE;
static {
int minDataSize = 0;
for (PackedInts.Format format : PackedInts.Format.values()) {
@@ -61,14 +66,26 @@ final class ForUtil {
minDataSize = Math.max(minDataSize, iterations * decoder.valueCount());
}
}
- MIN_DATA_SIZE = minDataSize;
+ MAX_DATA_SIZE = minDataSize;
}
+ /**
+ * Compute the number of iterations required to decode <code>BLOCK_SIZE</code>
+ * values with the provided {@link Decoder}.
+ */
private static int computeIterations(PackedInts.Decoder decoder) {
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
}
- private final PackedInts.FormatAndBits[] formats;
+ /**
+ * Compute the number of bytes required to encode a block of values that require
+ * <code>bitsPerValue</code> bits per value with format <code>format</code>.
+ */
+ private static int encodedSize(PackedInts.Format format, int bitsPerValue) {
+ return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3;
+ }
+
+ private final int[] encodedSizes;
private final PackedInts.Encoder[] encoders;
private final PackedInts.Decoder[] decoders;
private final int[] iterations;
@@ -77,7 +94,7 @@ final class ForUtil {
* Create a new {@link ForUtil} instance and save state into <code>out</code>.
*/
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
- formats = new PackedInts.FormatAndBits[33];
+ encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
iterations = new int[33];
@@ -87,7 +104,7 @@ final class ForUtil {
BLOCK_SIZE, bpv, acceptableOverheadRatio);
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
assert formatAndBits.bitsPerValue <= 32;
- formats[bpv] = formatAndBits;
+ encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
formatAndBits.format, PACKED_INTS_VERSION, formatAndBits.bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
@@ -102,7 +119,7 @@ final class ForUtil {
* Restore a {@link ForUtil} from a {@link DataInput}.
*/
ForUtil(DataInput in) throws IOException {
- formats = new PackedInts.FormatAndBits[33];
+ encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
iterations = new int[33];
@@ -114,7 +131,7 @@ final class ForUtil {
final PackedInts.Format format = PackedInts.Format.byId(formatId);
assert format.isSupported(bitsPerValue);
- formats[bpv] = new PackedInts.FormatAndBits(format, bitsPerValue);
+ encodedSizes[bpv] = encodedSize(format, bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
format, PACKED_INTS_VERSION, bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
@@ -124,19 +141,6 @@ final class ForUtil {
}
/**
- * Compute the minimum size of the buffer that holds values. This method exists
- * because {@link Decoder}s cannot decode less than a given amount of blocks
- * at a time.
- */
- int getMinRequiredBufferSize() {
- int minSize = 0;
- for (int bpv = 1; bpv <= 32; ++bpv) {
- minSize = Math.max(minSize, iterations[bpv] * decoders[bpv].valueCount());
- }
- return minSize;
- }
-
- /**
* Write a block of data (<code>For</code> format).
*
* @param data the data to write
@@ -156,7 +160,7 @@ final class ForUtil {
final PackedInts.Encoder encoder = encoders[numBits];
final int iters = iterations[numBits];
assert iters * encoder.valueCount() >= BLOCK_SIZE;
- final int encodedSize = encodedSize(numBits);
+ final int encodedSize = encodedSizes[numBits];
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
out.writeVInt(numBits);
@@ -183,7 +187,7 @@ final class ForUtil {
return;
}
- final int encodedSize = encodedSize(numBits);
+ final int encodedSize = encodedSizes[numBits];
in.readBytes(encoded, 0, encodedSize);
final PackedInts.Decoder decoder = decoders[numBits];
@@ -206,32 +210,10 @@ final class ForUtil {
return;
}
assert numBits > 0 && numBits <= 32 : numBits;
- final int encodedSize = encodedSize(numBits);
+ final int encodedSize = encodedSizes[numBits];
in.seek(in.getFilePointer() + encodedSize);
}
- /**
- * Read values that have been written using variable-length encoding instead of bit-packing.
- */
- static void readVIntBlock(IndexInput docIn, int[] docBuffer,
- int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
- if (indexHasFreq) {
- for(int i=0;i<num;i++) {
- final int code = docIn.readVInt();
- docBuffer[i] = code >>> 1;
- if ((code & 1) != 0) {
- freqBuffer[i] = 1;
- } else {
- freqBuffer[i] = docIn.readVInt();
- }
- }
- } else {
- for(int i=0;i<num;i++) {
- docBuffer[i] = docIn.readVInt();
- }
- }
- }
-
// nocommit: we must have a util function for this, hmm?
private static boolean isAllEqual(final int[] data) {
final long v = data[0];
@@ -255,13 +237,4 @@ final class ForUtil {
return PackedInts.bitsRequired(or);
}
- /**
- * Compute the number of bytes required to encode a block of values that require
- * <code>bitsPerValue</code> bits per value.
- */
- private int encodedSize(int bitsPerValue) {
- final FormatAndBits formatAndBits = formats[bitsPerValue];
- return formatAndBits.format.nblocks(formatAndBits.bitsPerValue, BLOCK_SIZE) << 3;
- }
-
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java?rev=1371184&r1=1371183&r2=1371184&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/codecs/blockpacked/TestForUtil.java Thu Aug 9 12:52:24 2012
@@ -18,8 +18,8 @@ package org.apache.lucene.codecs.blockpa
*/
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_DATA_SIZE;
-import static org.apache.lucene.codecs.blockpacked.ForUtil.MIN_ENCODED_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_DATA_SIZE;
+import static org.apache.lucene.codecs.blockpacked.ForUtil.MAX_ENCODED_SIZE;
import java.io.IOException;
import java.util.Arrays;
@@ -39,7 +39,7 @@ public class TestForUtil extends LuceneT
public void testEncodeDecode() throws IOException {
final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
final float acceptableOverheadRatio = random().nextFloat();
- final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MIN_DATA_SIZE];
+ final int[] values = new int[(iterations - 1) * BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
for (int i = 0; i < iterations; ++i) {
final int bpv = random().nextInt(32);
if (bpv == 0) {
@@ -66,7 +66,7 @@ public class TestForUtil extends LuceneT
for (int i = 0; i < iterations; ++i) {
forUtil.writeBlock(
Arrays.copyOfRange(values, i * BLOCK_SIZE, values.length),
- new byte[MIN_ENCODED_SIZE], out);
+ new byte[MAX_ENCODED_SIZE], out);
}
endPointer = out.getFilePointer();
out.close();
@@ -81,8 +81,8 @@ public class TestForUtil extends LuceneT
forUtil.skipBlock(in);
continue;
}
- final int[] restored = new int[MIN_DATA_SIZE];
- forUtil.readBlock(in, new byte[MIN_ENCODED_SIZE], restored);
+ final int[] restored = new int[MAX_DATA_SIZE];
+ forUtil.readBlock(in, new byte[MAX_ENCODED_SIZE], restored);
assertArrayEquals(Arrays.copyOfRange(values, i * BLOCK_SIZE, (i + 1) * BLOCK_SIZE),
Arrays.copyOf(restored, BLOCK_SIZE));
}