You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/11/07 15:17:43 UTC
svn commit: r1406651 - in /lucene/dev/trunk/lucene: ./
codecs/src/java/org/apache/lucene/codecs/compressing/
core/src/java/org/apache/lucene/codecs/lucene41/
core/src/java/org/apache/lucene/util/packed/
core/src/test/org/apache/lucene/util/packed/
Author: jpountz
Date: Wed Nov 7 14:17:42 2012
New Revision: 1406651
URL: http://svn.apache.org/viewvc?rev=1406651&view=rev
Log:
LUCENE-4536: Make PackedInts on-disk format byte-aligned.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov 7 14:17:42 2012
@@ -142,6 +142,10 @@ Bug Fixes
Optimizations
+* LUCENE-4536: PackedInts on-disk format is now byte-aligned (it used to be
+ long-aligned), saving up to 7 bytes per array of values.
+ (Adrien Grand, Mike McCandless)
+
* LUCENE-4512: Additional memory savings for CompressingStoredFieldsFormat.
(Adrien Grand, Robert Muir)
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Nov 7 14:17:42 2012
@@ -205,7 +205,7 @@ final class CompressingStoredFieldsReade
}
final int length = (int) lengths.next();
// skip the last values
- fieldsStream.seek(filePointer + (PackedInts.Format.PACKED.nblocks(bitsPerValue, chunkDocs) << 3));
+ fieldsStream.seek(filePointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, chunkDocs, bitsPerValue));
decompressor.decompress(fieldsStream, offset, length, bytes);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java Wed Nov 7 14:17:42 2012
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene4
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
@@ -83,8 +82,10 @@ final class ForUtil {
* Compute the number of bytes required to encode a block of values that require
* <code>bitsPerValue</code> bits per value with format <code>format</code>.
*/
- private static int encodedSize(PackedInts.Format format, int bitsPerValue) {
- return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3;
+ private static int encodedSize(PackedInts.Format format, int packedIntsVersion, int bitsPerValue) {
+ final long byteCount = format.byteCount(packedIntsVersion, BLOCK_SIZE, bitsPerValue);
+ assert byteCount >= 0 && byteCount <= Integer.MAX_VALUE : byteCount;
+ return (int) byteCount;
}
private final int[] encodedSizes;
@@ -107,7 +108,7 @@ final class ForUtil {
BLOCK_SIZE, bpv, acceptableOverheadRatio);
assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
assert formatAndBits.bitsPerValue <= 32;
- encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue);
+ encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
@@ -123,9 +124,7 @@ final class ForUtil {
*/
ForUtil(DataInput in) throws IOException {
int packedIntsVersion = in.readVInt();
- if (packedIntsVersion != PackedInts.VERSION_START) {
- throw new CorruptIndexException("expected version=" + PackedInts.VERSION_START + " but got version=" + packedIntsVersion);
- }
+ PackedInts.checkVersion(packedIntsVersion);
encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
@@ -138,7 +137,7 @@ final class ForUtil {
final PackedInts.Format format = PackedInts.Format.byId(formatId);
assert format.isSupported(bitsPerValue);
- encodedSizes[bpv] = encodedSize(format, bitsPerValue);
+ encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
format, packedIntsVersion, bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java Wed Nov 7 14:17:42 2012
@@ -2,6 +2,7 @@
package org.apache.lucene.util.packed;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -157,14 +158,19 @@ abstract class BulkOperation implements
* - 50 bits per value -> b=25, v=32
* - 63 bits per value -> b=63, v=64
* - ...
- *
+ * <p>
* A bulk read consists in copying <code>iterations*v</code> values that are
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
* (higher values of <code>iterations</code> are likely to yield a better
* throughput) => this requires n * (b + v) longs in memory.
- *
+ * <p>
* This method computes <code>iterations</code> as
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
+ * <p>
+ * The resulting number of iterations of this method is guaranteed not to
+ * overflow when multiplied by
+ * <tt>8 * {@link PackedInts.Encoder#blockCount()}</tt> or
+ * <tt>8 * {@link PackedInts.Decoder#blockCount()}</tt>.
*/
public final int computeIterations(int valueCount, int ramBudget) {
final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java Wed Nov 7 14:17:42 2012
@@ -37,16 +37,15 @@ final class Direct16 extends PackedInts.
values = new short[valueCount];
}
- Direct16(DataInput in, int valueCount) throws IOException {
+ Direct16(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readShort();
}
- final int mod = valueCount % 4;
- if (mod != 0) {
- for (int i = mod; i < 4; ++i) {
- in.readShort();
- }
+ // because packed ints have not always been byte-aligned
+ final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 16) - 2L * valueCount);
+ for (int i = 0; i < remaining; ++i) {
+ in.readByte();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java Wed Nov 7 14:17:42 2012
@@ -37,16 +37,15 @@ final class Direct32 extends PackedInts.
values = new int[valueCount];
}
- Direct32(DataInput in, int valueCount) throws IOException {
+ Direct32(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readInt();
}
- final int mod = valueCount % 2;
- if (mod != 0) {
- for (int i = mod; i < 2; ++i) {
- in.readInt();
- }
+ // because packed ints have not always been byte-aligned
+ final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 32) - 4L * valueCount);
+ for (int i = 0; i < remaining; ++i) {
+ in.readByte();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java Wed Nov 7 14:17:42 2012
@@ -37,7 +37,7 @@ final class Direct64 extends PackedInts.
values = new long[valueCount];
}
- Direct64(DataInput in, int valueCount) throws IOException {
+ Direct64(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readLong();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java Wed Nov 7 14:17:42 2012
@@ -37,16 +37,13 @@ final class Direct8 extends PackedInts.M
values = new byte[valueCount];
}
- Direct8(DataInput in, int valueCount) throws IOException {
+ Direct8(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
- for (int i = 0; i < valueCount; ++i) {
- values[i] = in.readByte();
- }
- final int mod = valueCount % 8;
- if (mod != 0) {
- for (int i = mod; i < 8; ++i) {
- in.readByte();
- }
+ in.readBytes(values, 0, valueCount);
+ // because packed ints have not always been byte-aligned
+ final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 8) - 1L * valueCount);
+ for (int i = 0; i < remaining; ++i) {
+ in.readByte();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java Wed Nov 7 14:17:42 2012
@@ -22,13 +22,10 @@ import org.apache.lucene.store.IndexInpu
import java.io.IOException;
/* Reads directly from disk on each get */
-final class DirectPackedReader extends PackedInts.ReaderImpl {
+class DirectPackedReader extends PackedInts.ReaderImpl {
private final IndexInput in;
private final long startPointer;
- private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
- private static final int MOD_MASK = Packed64.MOD_MASK;
-
// masks[n-1] masks for bottom n bits
private final long[] masks;
@@ -49,22 +46,30 @@ final class DirectPackedReader extends P
@Override
public long get(int index) {
final long majorBitPos = (long)index * bitsPerValue;
- final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
- final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
-
- final long result;
+ final long elementPos = majorBitPos >>> 3;
try {
- in.seek(startPointer + (elementPos << 3));
- final long l1 = in.readLong();
- final int bits1 = 64 - bitPos;
- if (bits1 >= bitsPerValue) { // not split
- result = l1 >> (bits1-bitsPerValue) & masks[bitsPerValue-1];
- } else {
- final int bits2 = bitsPerValue - bits1;
- final long result1 = (l1 & masks[bits1-1]) << bits2;
- final long l2 = in.readLong();
- final long result2 = l2 >> (64 - bits2) & masks[bits2-1];
- result = result1 | result2;
+ in.seek(startPointer + elementPos);
+
+ final byte b0 = in.readByte();
+ final int bitPos = (int) (majorBitPos & 7);
+ if (bitPos + bitsPerValue <= 8) {
+ // special case: all bits are in the first byte
+ return (b0 & ((1L << (8 - bitPos)) - 1)) >>> (8 - bitPos - bitsPerValue);
+ }
+
+ // take bits from the first byte
+ int remainingBits = bitsPerValue - 8 + bitPos;
+ long result = (b0 & ((1L << (8 - bitPos)) - 1)) << remainingBits;
+
+ // add bits from inner bytes
+ while (remainingBits >= 8) {
+ remainingBits -= 8;
+ result |= (in.readByte() & 0xFFL) << remainingBits;
+ }
+
+ // take bits from the last byte
+ if (remainingBits > 0) {
+ result |= (in.readByte() & 0xFFL) >>> (8 - remainingBits);
}
return result;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java Wed Nov 7 14:17:42 2012
@@ -42,16 +42,15 @@ final class Packed16ThreeBlocks extends
blocks = new short[valueCount * 3];
}
- Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
+ Packed16ThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readShort();
}
- final int mod = blocks.length % 4;
- if (mod != 0) {
- for (int i = mod; i < 4; ++i) {
- in.readShort();
- }
+ // because packed ints have not always been byte-aligned
+ final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 48) - 3L * valueCount * 2);
+ for (int i = 0; i < remaining; ++i) {
+ in.readByte();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java Wed Nov 7 14:17:42 2012
@@ -67,26 +67,10 @@ class Packed64 extends PackedInts.Mutabl
* @param bitsPerValue the number of bits available for any given value.
*/
public Packed64(int valueCount, int bitsPerValue) {
- // NOTE: block-size was previously calculated as
- // valueCount * bitsPerValue / BLOCK_SIZE + 1
- // due to memory layout requirements dictated by non-branching code
- this(new long[size(valueCount, bitsPerValue)],
- valueCount, bitsPerValue);
- }
-
- /**
- * Creates an array backed by the given blocks.
- * </p><p>
- * Note: The blocks are used directly, so changes to the given block will
- * affect the Packed64-structure.
- * @param blocks used as the internal backing array. Not that the last
- * element cannot be addressed directly.
- * @param valueCount the number of values.
- * @param bitsPerValue the number of bits available for any given value.
- */
- public Packed64(long[] blocks, int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
- this.blocks = blocks;
+ final PackedInts.Format format = PackedInts.Format.PACKED;
+ final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue);
+ this.blocks = new long[longCount];
maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> (BLOCK_SIZE-bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
@@ -99,23 +83,30 @@ class Packed64 extends PackedInts.Mutabl
* @throws java.io.IOException if the values for the backing array could not
* be retrieved.
*/
- public Packed64(DataInput in, int valueCount, int bitsPerValue)
+ public Packed64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
throws IOException {
super(valueCount, bitsPerValue);
- int size = size(valueCount, bitsPerValue);
- blocks = new long[size]; // Previously +1 due to non-conditional tricks
- for(int i=0;i<size;i++) {
+ final PackedInts.Format format = PackedInts.Format.PACKED;
+ final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
+ final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
+ blocks = new long[longCount];
+ // read as many longs as we can
+ for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
+ final int remaining = (int) (byteCount % 8);
+ if (remaining != 0) {
+ // read the last bytes
+ long lastLong = 0;
+ for (int i = 0; i < remaining; ++i) {
+ lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
+ }
+ blocks[blocks.length - 1] = lastLong;
+ }
maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> (BLOCK_SIZE-bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
- private static int size(int valueCount, int bitsPerValue) {
- final long totBitCount = (long) valueCount * bitsPerValue;
- return (int)(totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1));
- }
-
/**
* @param index the position of the value.
* @return the value at the given index.
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java Wed Nov 7 14:17:42 2012
@@ -42,16 +42,13 @@ final class Packed8ThreeBlocks extends P
blocks = new byte[valueCount * 3];
}
- Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
+ Packed8ThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
- for (int i = 0; i < 3 * valueCount; ++i) {
- blocks[i] = in.readByte();
- }
- final int mod = blocks.length % 8;
- if (mod != 0) {
- for (int i = mod; i < 8; ++i) {
- in.readByte();
- }
+ in.readBytes(blocks, 0, 3 * valueCount);
+ // because packed ints have not always been byte-aligned
+ final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 24) - 3L * valueCount * 1);
+ for (int i = 0; i < remaining; ++i) {
+ in.readByte();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Wed Nov 7 14:17:42 2012
@@ -62,10 +62,14 @@ public class PackedInts {
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
public final static String CODEC_NAME = "PackedInts";
- public final static int VERSION_START = 0;
- public final static int VERSION_CURRENT = VERSION_START;
+ public final static int VERSION_START = 0; // PackedInts were long-aligned
+ public final static int VERSION_BYTE_ALIGNED = 1;
+ public final static int VERSION_CURRENT = VERSION_BYTE_ALIGNED;
- private static void checkVersion(int version) {
+ /**
+ * Check the validity of a version number.
+ */
+ public static void checkVersion(int version) {
if (version < VERSION_START) {
throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")");
} else if (version > VERSION_CURRENT) {
@@ -85,8 +89,12 @@ public class PackedInts {
PACKED(0) {
@Override
- public int nblocks(int bitsPerValue, int values) {
- return (int) Math.ceil((double) values * bitsPerValue / 64);
+ public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+ if (packedIntsVersion < VERSION_BYTE_ALIGNED) {
+ return 8L * (long) Math.ceil((double) valueCount * bitsPerValue / 64);
+ } else {
+ return (long) Math.ceil((double) valueCount * bitsPerValue / 8);
+ }
}
},
@@ -101,9 +109,9 @@ public class PackedInts {
PACKED_SINGLE_BLOCK(1) {
@Override
- public int nblocks(int bitsPerValue, int values) {
+ public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
final int valuesPerBlock = 64 / bitsPerValue;
- return (int) Math.ceil((double) values / valuesPerBlock);
+ return (int) Math.ceil((double) valueCount / valuesPerBlock);
}
@Override
@@ -147,10 +155,29 @@ public class PackedInts {
}
/**
- * Computes how many blocks are needed to store <code>values</code> values
- * of size <code>bitsPerValue</code>.
+ * Computes how many byte blocks are needed to store <code>values</code>
+ * values of size <code>bitsPerValue</code>.
*/
- public abstract int nblocks(int bitsPerValue, int values);
+ public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+ assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue;
+ // assume long-aligned
+ return 8L * longCount(packedIntsVersion, valueCount, bitsPerValue);
+ }
+
+ /**
+ * Computes how many long blocks are needed to store <code>values</code>
+ * values of size <code>bitsPerValue</code>.
+ */
+ public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+ assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue;
+ final long byteCount = byteCount(packedIntsVersion, valueCount, bitsPerValue);
+ assert byteCount < 8L * Integer.MAX_VALUE;
+ if ((byteCount % 8) == 0) {
+ return (int) (byteCount / 8);
+ } else {
+ return (int) (byteCount / 8 + 1);
+ }
+ }
/**
* Tests whether the provided number of bits per value is supported by the
@@ -725,25 +752,25 @@ public class PackedInts {
case PACKED:
switch (bitsPerValue) {
case 8:
- return new Direct8(in, valueCount);
+ return new Direct8(version, in, valueCount);
case 16:
- return new Direct16(in, valueCount);
+ return new Direct16(version, in, valueCount);
case 32:
- return new Direct32(in, valueCount);
+ return new Direct32(version, in, valueCount);
case 64:
- return new Direct64(in, valueCount);
+ return new Direct64(version, in, valueCount);
case 24:
if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
- return new Packed8ThreeBlocks(in, valueCount);
+ return new Packed8ThreeBlocks(version, in, valueCount);
}
break;
case 48:
if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
- return new Packed16ThreeBlocks(in, valueCount);
+ return new Packed16ThreeBlocks(version, in, valueCount);
}
break;
}
- return new Packed64(in, valueCount, bitsPerValue);
+ return new Packed64(version, in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknown Writer format: " + format);
}
@@ -786,7 +813,7 @@ public class PackedInts {
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
int valueCount, int bitsPerValue, int mem) {
checkVersion(version);
- return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
+ return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem);
}
/**
@@ -823,12 +850,37 @@ public class PackedInts {
* @return a direct Reader
* @lucene.internal
*/
- public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
+ public static Reader getDirectReaderNoHeader(final IndexInput in, Format format,
int version, int valueCount, int bitsPerValue) {
checkVersion(version);
switch (format) {
case PACKED:
- return new DirectPackedReader(bitsPerValue, valueCount, in);
+ final long byteCount = format.byteCount(version, valueCount, bitsPerValue);
+ if (byteCount != format.byteCount(VERSION_CURRENT, valueCount, bitsPerValue)) {
+ assert version == VERSION_START;
+ final long endPointer = in.getFilePointer() + byteCount;
+ // Some consumers of direct readers assume that reading the last value
+ // will make the underlying IndexInput go to the end of the packed
+ // stream, but this is not true because packed ints storage used to be
+ // long-aligned and is now byte-aligned, hence this additional
+ // condition when reading the last value
+ return new DirectPackedReader(bitsPerValue, valueCount, in) {
+ @Override
+ public long get(int index) {
+ final long result = super.get(index);
+ if (index == valueCount - 1) {
+ try {
+ in.seek(endPointer);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed", e);
+ }
+ }
+ return result;
+ }
+ };
+ } else {
+ return new DirectPackedReader(bitsPerValue, valueCount, in);
+ }
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Wed Nov 7 14:17:42 2012
@@ -19,29 +19,30 @@ package org.apache.lucene.util.packed;
import java.io.EOFException;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.LongsRef;
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
+ final int packedIntsVersion;
final PackedInts.Format format;
final BulkOperation bulkOperation;
- final long[] nextBlocks;
+ final byte[] nextBlocks;
final LongsRef nextValues;
final int iterations;
int position;
- PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
+ PackedReaderIterator(PackedInts.Format format, int packedIntsVersion, int valueCount, int bitsPerValue, DataInput in, int mem) {
super(valueCount, bitsPerValue, in);
this.format = format;
+ this.packedIntsVersion = packedIntsVersion;
bulkOperation = BulkOperation.of(format, bitsPerValue);
iterations = bulkOperation.computeIterations(valueCount, mem);
assert valueCount == 0 || iterations > 0;
- nextBlocks = new long[iterations * bulkOperation.blockCount()];
+ nextBlocks = new byte[8 * iterations * bulkOperation.blockCount()];
nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
- assert iterations * bulkOperation.valueCount() == nextValues.longs.length;
- assert iterations * bulkOperation.blockCount() == nextBlocks.length;
nextValues.offset = nextValues.longs.length;
position = -1;
}
@@ -61,13 +62,11 @@ final class PackedReaderIterator extends
count = Math.min(remaining, count);
if (nextValues.offset == nextValues.longs.length) {
- final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
- final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
- for (int i = 0; i < blocksToRead; ++i) {
- nextBlocks[i] = in.readLong();
- }
- for (int i = blocksToRead; i < nextBlocks.length; ++i) {
- nextBlocks[i] = 0L;
+ final long remainingBlocks = format.byteCount(packedIntsVersion, remaining, bitsPerValue);
+ final int blocksToRead = (int) Math.min(remainingBlocks, nextBlocks.length);
+ in.readBytes(nextBlocks, 0, blocksToRead);
+ if (blocksToRead < nextBlocks.length) {
+ Arrays.fill(nextBlocks, blocksToRead, nextBlocks.length, (byte) 0);
}
bulkOperation.decode(nextBlocks, 0, nextValues.longs, 0, iterations);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java Wed Nov 7 14:17:42 2012
@@ -31,7 +31,7 @@ final class PackedWriter extends PackedI
boolean finished;
final PackedInts.Format format;
final BulkOperation encoder;
- final long[] nextBlocks;
+ final byte[] nextBlocks;
final long[] nextValues;
final int iterations;
int off;
@@ -42,7 +42,7 @@ final class PackedWriter extends PackedI
this.format = format;
encoder = BulkOperation.of(format, bitsPerValue);
iterations = encoder.computeIterations(valueCount, mem);
- nextBlocks = new long[iterations * encoder.blockCount()];
+ nextBlocks = new byte[8 * iterations * encoder.blockCount()];
nextValues = new long[iterations * encoder.valueCount()];
off = 0;
written = 0;
@@ -82,10 +82,8 @@ final class PackedWriter extends PackedI
private void flush() throws IOException {
encoder.encode(nextValues, 0, nextBlocks, 0, iterations);
- final int blocks = format.nblocks(bitsPerValue, off);
- for (int i = 0; i < blocks; ++i) {
- out.writeLong(nextBlocks[i]);
- }
+ final int blockCount = (int) format.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue);
+ out.writeBytes(nextBlocks, blockCount);
Arrays.fill(nextValues, 0L);
off = 0;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py Wed Nov 7 14:17:42 2012
@@ -65,17 +65,19 @@ if __name__ == '__main__':
f.write(" values = new %s[valueCount];\n" %TYPES[bpv])
f.write(" }\n\n")
- f.write(" Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
+ f.write(" Direct%d(int packedIntsVersion, DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
- f.write(" for (int i = 0; i < valueCount; ++i) {\n")
- f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
- f.write(" }\n")
+ if bpv == 8:
+ f.write(" in.readBytes(values, 0, valueCount);\n")
+ else:
+ f.write(" for (int i = 0; i < valueCount; ++i) {\n")
+ f.write(" values[i] = in.read%s();\n" %TYPES[bpv].title())
+ f.write(" }\n")
if bpv != 64:
- f.write(" final int mod = valueCount %% %d;\n" %(64 / bpv))
- f.write(" if (mod != 0) {\n")
- f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
- f.write(" in.read%s();\n" %TYPES[bpv].title())
- f.write(" }\n")
+ f.write(" // because packed ints have not always been byte-aligned\n")
+ f.write(" final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, %d) - %dL * valueCount);\n" %(bpv, bpv / 8))
+ f.write(" for (int i = 0; i < remaining; ++i) {\n")
+ f.write(" in.readByte();\n")
f.write(" }\n")
f.write(" }\n")
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py Wed Nov 7 14:17:42 2012
@@ -70,16 +70,18 @@ if __name__ == '__main__':
f.write(" blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
f.write(" }\n\n")
- f.write(" Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
+ f.write(" Packed%dThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {\n" %bpv)
f.write(" this(valueCount);\n")
- f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
- f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
- f.write(" }\n")
- f.write(" final int mod = blocks.length %% %d;\n" %(64 / bpv))
- f.write(" if (mod != 0) {\n")
- f.write(" for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
- f.write(" in.read%s();\n" %TYPES[bpv].title())
- f.write(" }\n")
+ if bpv == 8:
+ f.write(" in.readBytes(blocks, 0, 3 * valueCount);\n")
+ else:
+ f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
+ f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
+ f.write(" }\n")
+ f.write(" // because packed ints have not always been byte-aligned\n")
+ f.write(" final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, %d) - 3L * valueCount * %d);\n" %(3 * bpv, bpv / 8))
+ f.write(" for (int i = 0; i < remaining; ++i) {\n")
+ f.write(" in.readByte();\n")
f.write(" }\n")
f.write(" }\n")
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Wed Nov 7 14:17:42 2012
@@ -40,8 +40,28 @@ import org.apache.lucene.util.packed.Pac
import org.junit.Ignore;
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
@Slow
public class TestPackedInts extends LuceneTestCase {
+
+ public void testByteCount() {
+ final int iters = atLeast(3);
+ for (int i = 0; i < iters; ++i) {
+ final int valueCount = RandomInts.randomIntBetween(random(), 1, Integer.MAX_VALUE);
+ for (PackedInts.Format format : PackedInts.Format.values()) {
+ for (int bpv = 1; bpv <= 64; ++bpv) {
+ final long byteCount = format.byteCount(PackedInts.VERSION_CURRENT, valueCount, bpv);
+ String msg = "format=" + format + ", byteCount=" + byteCount + ", valueCount=" + valueCount + ", bpv=" + bpv;
+ assertTrue(msg, byteCount * 8 >= (long) valueCount * bpv);
+ if (format == PackedInts.Format.PACKED) {
+ assertTrue(msg, (byteCount - 1) * 8 < (long) valueCount * bpv);
+ }
+ }
+ }
+ }
+ }
+
public void testBitsRequired() {
assertEquals(61, PackedInts.bitsRequired((long)Math.pow(2, 61)-1));
assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL));
@@ -88,21 +108,8 @@ public class TestPackedInts extends Luce
final long fp = out.getFilePointer();
out.close();
- // packed writers should only write longs
- assertEquals(0, (fp - startFp) % 8);
// ensure that finish() added the (valueCount-actualValueCount) missing values
- final long bytes;
- switch (w.getFormat()) {
- case PACKED:
- bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
- break;
- case PACKED_SINGLE_BLOCK:
- final int valuesPerBlock = 64 / w.bitsPerValue;
- bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
- break;
- default:
- bytes = -1;
- }
+ final long bytes = w.getFormat().byteCount(PackedInts.VERSION_CURRENT, valueCount, w.bitsPerValue);
assertEquals(bytes, fp - startFp);
{// test header
@@ -170,13 +177,58 @@ public class TestPackedInts extends Luce
long value = intsEnum.get(index);
assertEquals(msg, value, values[index]);
}
+ intsEnum.get(intsEnum.size() - 1);
+ assertEquals(fp, in.getFilePointer());
in.close();
}
d.close();
}
}
}
-
+
+ public void testEndPointer() throws IOException {
+ final Directory dir = newDirectory();
+ final int valueCount = RandomInts.randomIntBetween(random(), 1, 1000);
+ final IndexOutput out = dir.createOutput("tests.bin", newIOContext(random()));
+ for (int i = 0; i < valueCount; ++i) {
+ out.writeLong(0);
+ }
+ out.close();
+ final IndexInput in = dir.openInput("tests.bin", newIOContext(random()));
+ for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; ++version) {
+ for (int bpv = 1; bpv <= 64; ++bpv) {
+ for (PackedInts.Format format : PackedInts.Format.values()) {
+ if (!format.isSupported(bpv)) {
+ continue;
+ }
+ final long byteCount = format.byteCount(version, valueCount, bpv);
+ String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;
+
+ // test iterator
+ in.seek(0L);
+ final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, format, version, valueCount, bpv, RandomInts.randomIntBetween(random(), 1, 1<<16));
+ for (int i = 0; i < valueCount; ++i) {
+ it.next();
+ }
+ assertEquals(msg, byteCount, in.getFilePointer());
+
+ // test direct reader
+ in.seek(0L);
+ final PackedInts.Reader directReader = PackedInts.getDirectReaderNoHeader(in, format, version, valueCount, bpv);
+ directReader.get(valueCount - 1);
+ assertEquals(msg, byteCount, in.getFilePointer());
+
+ // test reader
+ in.seek(0L);
+ PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
+ assertEquals(msg, byteCount, in.getFilePointer());
+ }
+ }
+ }
+ in.close();
+ dir.close();
+ }
+
public void testControlledEquality() {
final int VALUE_COUNT = 255;
final int BITS_PER_VALUE = 8;
@@ -629,6 +681,7 @@ public class TestPackedInts extends Luce
in.close();
directory.deleteFile("packed-ints.bin");
}
+ directory.close();
}
}