You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/01/22 00:47:32 UTC
svn commit: r1436692 - in /lucene/dev/branches/lucene4547/lucene/core/src:
java/org/apache/lucene/codecs/compressing/
java/org/apache/lucene/codecs/lucene42/ java/org/apache/lucene/util/packed/
test/org/apache/lucene/util/packed/
Author: jpountz
Date: Mon Jan 21 23:47:31 2013
New Revision: 1436692
URL: http://svn.apache.org/viewvc?rev=1436692&view=rev
Log:
NumericDocValues: Prevent rare extreme values from raising the number of bits per value for everyone.
Added:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (with props)
Modified:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Mon Jan 21 23:47:31 2013
@@ -57,7 +57,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.PackedInts;
@@ -76,7 +76,7 @@ public final class CompressingTermVector
private final int chunkSize;
private final int numDocs;
private boolean closed;
- private final BlockPackedReader reader;
+ private final BlockPackedReaderIterator reader;
// used by clone
private CompressingTermVectorsReader(CompressingTermVectorsReader reader) {
@@ -88,7 +88,7 @@ public final class CompressingTermVector
this.decompressor = reader.decompressor.clone();
this.chunkSize = reader.chunkSize;
this.numDocs = reader.numDocs;
- this.reader = new BlockPackedReader(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+ this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
this.closed = false;
}
@@ -119,7 +119,7 @@ public final class CompressingTermVector
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
decompressor = compressionMode.newDecompressor();
- this.reader = new BlockPackedReader(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+ this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
success = true;
} finally {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Mon Jan 21 23:47:31 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -53,7 +54,9 @@ class Lucene42DocValuesConsumer extends
static final byte NUMBER = 0;
static final byte BYTES = 1;
static final byte FST = 2;
-
+
+ static final int BLOCK_SIZE = 4096;
+
final IndexOutput data, meta;
Lucene42DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
@@ -97,15 +100,10 @@ class Lucene42DocValuesConsumer extends
}
}
- long delta = maxValue - minValue;
- final int bitsPerValue;
- if (delta < 0) {
- bitsPerValue = 64;
- meta.writeByte((byte)0); // delta-compressed
- } else if (uniqueValues != null && PackedInts.bitsRequired(uniqueValues.size()-1) < PackedInts.bitsRequired(delta)) {
+ final long delta = maxValue - minValue;
+ if (uniqueValues != null && (delta < 0 || PackedInts.bitsRequired(uniqueValues.size()-1) < PackedInts.bitsRequired(delta))) {
// smaller to tableize
- bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
- minValue = 0; // we will write indexes into the table instead of values
+ final int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
meta.writeByte((byte)1); // table-compressed
Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
final HashMap<Long,Integer> encode = new HashMap<Long,Integer>();
@@ -114,39 +112,29 @@ class Lucene42DocValuesConsumer extends
data.writeLong(decode[i]);
encode.put(decode[i], i);
}
- final Iterable<Number> original = values;
- values = new Iterable<Number>() {
- @Override
- public Iterator<Number> iterator() {
- final Iterator<Number> inner = original.iterator();
- return new Iterator<Number>() {
- @Override
- public boolean hasNext() {
- return inner.hasNext();
- }
- @Override
- public Number next() {
- return encode.get(inner.next());
- }
-
- @Override
- public void remove() { throw new UnsupportedOperationException(); }
- };
- }
- };
+ data.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeVInt(count);
+ data.writeVInt(bitsPerValue);
+
+ final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, count, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+ for(Number nv : values) {
+ writer.add(encode.get(nv));
+ }
+ writer.finish();
} else {
- bitsPerValue = PackedInts.bitsRequired(delta);
meta.writeByte((byte)0); // delta-compressed
- }
-
- data.writeLong(minValue);
- final PackedInts.Writer writer = PackedInts.getWriter(data, count, bitsPerValue, PackedInts.COMPACT);
- for(Number nv : values) {
- writer.add(nv.longValue() - minValue);
+ data.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeVInt(count);
+ data.writeVInt(BLOCK_SIZE);
+
+ final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+ for (Number nv : values) {
+ writer.add(nv.longValue());
+ }
+ writer.finish();
}
- writer.finish();
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Mon Jan 21 23:47:31 2013
@@ -42,6 +42,7 @@ import org.apache.lucene.util.fst.FST.Ar
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
class Lucene42DocValuesProducer extends DocValuesProducer {
@@ -139,9 +140,10 @@ class Lucene42DocValuesProducer extends
for (int i = 0; i < decode.length; i++) {
decode[i] = data.readLong();
}
- final long minValue = data.readLong();
- assert minValue == 0;
- final PackedInts.Reader reader = PackedInts.getReader(data);
+ final int packedIntsVersion = data.readVInt();
+ final int count = data.readVInt();
+ final int bitsPerValue = data.readVInt();
+ final PackedInts.Reader reader = PackedInts.getReaderNoHeader(data, PackedInts.Format.PACKED, packedIntsVersion, count, bitsPerValue);
return new NumericDocValues() {
@Override
public long get(int docID) {
@@ -149,12 +151,14 @@ class Lucene42DocValuesProducer extends
}
};
} else {
- final long minValue = data.readLong();
- final PackedInts.Reader reader = PackedInts.getReader(data);
+ final int packedIntsVersion = data.readVInt();
+ final int count = data.readVInt();
+ final int blockSize = data.readVInt();
+ final BlockPackedReader reader = new BlockPackedReader(data, packedIntsVersion, blockSize, count, false);
return new NumericDocValues() {
@Override
public long get(int docID) {
- return minValue + reader.get(docID);
+ return reader.get(docID);
}
};
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java Mon Jan 21 23:47:31 2013
@@ -17,225 +17,74 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
-import java.io.EOFException;
import java.io.IOException;
-import java.util.Arrays;
-import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.LongsRef;
/**
- * Reader for sequences of longs written with {@link BlockPackedWriter}.
- * @see BlockPackedWriter
+ * Provides random access to a stream written with {@link BlockPackedWriter}.
* @lucene.internal
*/
public final class BlockPackedReader {
- static long zigZagDecode(long n) {
- return ((n >>> 1) ^ -(n & 1));
- }
-
- // same as DataInput.readVLong but supports negative values
- static long readVLong(DataInput in) throws IOException {
- byte b = in.readByte();
- if (b >= 0) return b;
- long i = b & 0x7FL;
- b = in.readByte();
- i |= (b & 0x7FL) << 7;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 14;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 21;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 28;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 35;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 42;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0x7FL) << 49;
- if (b >= 0) return i;
- b = in.readByte();
- i |= (b & 0xFFL) << 56;
- return i;
- }
+ private final int blockShift, blockMask;
+ private final long valueCount;
+ private final long[] minValues;
+ private final PackedInts.Reader[] subReaders;
- DataInput in;
- final int packedIntsVersion;
- long valueCount;
- final int blockSize;
- final long[] values;
- final LongsRef valuesRef;
- byte[] blocks;
- int off;
- long ord;
-
- /** Sole constructor.
- * @param blockSize the number of values of a block, must be equal to the
- * block size of the {@link BlockPackedWriter} which has
- * been used to write the stream
- */
- public BlockPackedReader(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
+ /** Sole constructor. */
+ public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
checkBlockSize(blockSize);
- this.packedIntsVersion = packedIntsVersion;
- this.blockSize = blockSize;
- this.values = new long[blockSize];
- this.valuesRef = new LongsRef(this.values, 0, 0);
- reset(in, valueCount);
- }
-
- /** Reset the current reader to wrap a stream of <code>valueCount</code>
- * values contained in <code>in</code>. The block size remains unchanged. */
- public void reset(DataInput in, long valueCount) {
- this.in = in;
- assert valueCount >= 0;
this.valueCount = valueCount;
- off = blockSize;
- ord = 0;
- }
-
- /** Skip exactly <code>count</code> values. */
- public void skip(long count) throws IOException {
- assert count >= 0;
- if (ord + count > valueCount || ord + count < 0) {
- throw new EOFException();
- }
-
- // 1. skip buffered values
- final int skipBuffer = (int) Math.min(count, blockSize - off);
- off += skipBuffer;
- ord += skipBuffer;
- count -= skipBuffer;
- if (count == 0L) {
- return;
- }
-
- // 2. skip as many blocks as necessary
- assert off == blockSize;
- while (count >= blockSize) {
+ blockShift = Long.numberOfTrailingZeros(blockSize);
+ blockMask = blockSize - 1;
+ final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
+ if (numBlocks * blockSize < valueCount) {
+ throw new IllegalArgumentException("valueCount is too large for this block size");
+ }
+ long[] minValues = null;
+ subReaders = new PackedInts.Reader[numBlocks];
+ for (int i = 0; i < numBlocks; ++i) {
final int token = in.readByte() & 0xFF;
final int bitsPerValue = token >>> BPV_SHIFT;
if (bitsPerValue > 64) {
throw new IOException("Corrupted");
}
if ((token & MIN_VALUE_EQUALS_0) == 0) {
- readVLong(in);
- }
- final long blockBytes = PackedInts.Format.PACKED.byteCount(packedIntsVersion, blockSize, bitsPerValue);
- skipBytes(blockBytes);
- ord += blockSize;
- count -= blockSize;
- }
- if (count == 0L) {
- return;
- }
-
- // 3. skip last values
- assert count < blockSize;
- refill();
- ord += count;
- off += count;
- }
-
- private void skipBytes(long count) throws IOException {
- if (in instanceof IndexInput) {
- final IndexInput iin = (IndexInput) in;
- iin.seek(iin.getFilePointer() + count);
- } else {
- if (blocks == null) {
- blocks = new byte[blockSize];
- }
- long skipped = 0;
- while (skipped < count) {
- final int toSkip = (int) Math.min(blocks.length, count - skipped);
- in.readBytes(blocks, 0, toSkip);
- skipped += toSkip;
- }
- }
- }
-
- /** Read the next value. */
- public long next() throws IOException {
- if (ord == valueCount) {
- throw new EOFException();
- }
- if (off == blockSize) {
- refill();
- }
- final long value = values[off++];
- ++ord;
- return value;
- }
-
- /** Read between <tt>1</tt> and <code>count</code> values. */
- public LongsRef next(int count) throws IOException {
- assert count > 0;
- if (ord == valueCount) {
- throw new EOFException();
- }
- if (off == blockSize) {
- refill();
- }
-
- count = Math.min(count, blockSize - off);
- count = (int) Math.min(count, valueCount - ord);
-
- valuesRef.offset = off;
- valuesRef.length = count;
- off += count;
- ord += count;
- return valuesRef;
- }
-
- private void refill() throws IOException {
- final int token = in.readByte() & 0xFF;
- final boolean minEquals0 = (token & MIN_VALUE_EQUALS_0) != 0;
- final int bitsPerValue = token >>> BPV_SHIFT;
- if (bitsPerValue > 64) {
- throw new IOException("Corrupted");
- }
- final long minValue = minEquals0 ? 0L : zigZagDecode(1L + readVLong(in));
- assert minEquals0 || minValue != 0;
-
- if (bitsPerValue == 0) {
- Arrays.fill(values, minValue);
- } else {
- final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
- final int iterations = blockSize / decoder.valueCount();
- final int blocksSize = iterations * 8 * decoder.blockCount();
- if (blocks == null || blocks.length < blocksSize) {
- blocks = new byte[blocksSize];
+ if (minValues == null) {
+ minValues = new long[numBlocks];
+ }
+ minValues[i] = zigZagDecode(1L + readVLong(in));
}
-
- final int valueCount = (int) Math.min(this.valueCount - ord, blockSize);
- final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
- in.readBytes(blocks, 0, blocksCount);
-
- decoder.decode(blocks, 0, values, 0, iterations);
-
- if (minValue != 0) {
- for (int i = 0; i < valueCount; ++i) {
- values[i] += minValue;
+ if (bitsPerValue != 0) {
+ final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
+ if (direct) {
+ final long pointer = in.getFilePointer();
+ subReaders[i] = PackedInts.getDirectReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
+ in.seek(pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
+ } else {
+ subReaders[i] = PackedInts.getReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
}
}
}
- off = 0;
+ this.minValues = minValues;
}
- /** Return the offset of the next value to read. */
- public long ord() {
- return ord;
+ /** Get value at <code>index</code>. */
+ public long get(long index) {
+ assert index >= 0 && index < valueCount;
+ final int block = (int) (index >>> blockShift);
+ if (subReaders[block] == null) {
+ return minValues == null ? 0 : minValues[block];
+ }
+ final int idx = (int) (index & blockMask);
+ return (minValues == null ? 0 : minValues[block]) + subReaders[block].get(idx);
}
}
Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java?rev=1436692&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java Mon Jan 21 23:47:31 2013
@@ -0,0 +1,241 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
+import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.LongsRef;
+
+/**
+ * Reader for sequences of longs written with {@link BlockPackedWriter}.
+ * @see BlockPackedWriter
+ * @lucene.internal
+ */
+public final class BlockPackedReaderIterator {
+
+ static long zigZagDecode(long n) {
+ return ((n >>> 1) ^ -(n & 1));
+ }
+
+ // same as DataInput.readVLong but supports negative values
+ static long readVLong(DataInput in) throws IOException {
+ byte b = in.readByte();
+ if (b >= 0) return b;
+ long i = b & 0x7FL;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 7;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 14;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 21;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 28;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 35;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 42;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0x7FL) << 49;
+ if (b >= 0) return i;
+ b = in.readByte();
+ i |= (b & 0xFFL) << 56;
+ return i;
+ }
+
+ DataInput in;
+ final int packedIntsVersion;
+ long valueCount;
+ final int blockSize;
+ final long[] values;
+ final LongsRef valuesRef;
+ byte[] blocks;
+ int off;
+ long ord;
+
+ /** Sole constructor.
+ * @param blockSize the number of values of a block, must be equal to the
+ * block size of the {@link BlockPackedWriter} which has
+ * been used to write the stream
+ */
+ public BlockPackedReaderIterator(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
+ checkBlockSize(blockSize);
+ this.packedIntsVersion = packedIntsVersion;
+ this.blockSize = blockSize;
+ this.values = new long[blockSize];
+ this.valuesRef = new LongsRef(this.values, 0, 0);
+ reset(in, valueCount);
+ }
+
+ /** Reset the current reader to wrap a stream of <code>valueCount</code>
+ * values contained in <code>in</code>. The block size remains unchanged. */
+ public void reset(DataInput in, long valueCount) {
+ this.in = in;
+ assert valueCount >= 0;
+ this.valueCount = valueCount;
+ off = blockSize;
+ ord = 0;
+ }
+
+ /** Skip exactly <code>count</code> values. */
+ public void skip(long count) throws IOException {
+ assert count >= 0;
+ if (ord + count > valueCount || ord + count < 0) {
+ throw new EOFException();
+ }
+
+ // 1. skip buffered values
+ final int skipBuffer = (int) Math.min(count, blockSize - off);
+ off += skipBuffer;
+ ord += skipBuffer;
+ count -= skipBuffer;
+ if (count == 0L) {
+ return;
+ }
+
+ // 2. skip as many blocks as necessary
+ assert off == blockSize;
+ while (count >= blockSize) {
+ final int token = in.readByte() & 0xFF;
+ final int bitsPerValue = token >>> BPV_SHIFT;
+ if (bitsPerValue > 64) {
+ throw new IOException("Corrupted");
+ }
+ if ((token & MIN_VALUE_EQUALS_0) == 0) {
+ readVLong(in);
+ }
+ final long blockBytes = PackedInts.Format.PACKED.byteCount(packedIntsVersion, blockSize, bitsPerValue);
+ skipBytes(blockBytes);
+ ord += blockSize;
+ count -= blockSize;
+ }
+ if (count == 0L) {
+ return;
+ }
+
+ // 3. skip last values
+ assert count < blockSize;
+ refill();
+ ord += count;
+ off += count;
+ }
+
+ private void skipBytes(long count) throws IOException {
+ if (in instanceof IndexInput) {
+ final IndexInput iin = (IndexInput) in;
+ iin.seek(iin.getFilePointer() + count);
+ } else {
+ if (blocks == null) {
+ blocks = new byte[blockSize];
+ }
+ long skipped = 0;
+ while (skipped < count) {
+ final int toSkip = (int) Math.min(blocks.length, count - skipped);
+ in.readBytes(blocks, 0, toSkip);
+ skipped += toSkip;
+ }
+ }
+ }
+
+ /** Read the next value. */
+ public long next() throws IOException {
+ if (ord == valueCount) {
+ throw new EOFException();
+ }
+ if (off == blockSize) {
+ refill();
+ }
+ final long value = values[off++];
+ ++ord;
+ return value;
+ }
+
+ /** Read between <tt>1</tt> and <code>count</code> values. */
+ public LongsRef next(int count) throws IOException {
+ assert count > 0;
+ if (ord == valueCount) {
+ throw new EOFException();
+ }
+ if (off == blockSize) {
+ refill();
+ }
+
+ count = Math.min(count, blockSize - off);
+ count = (int) Math.min(count, valueCount - ord);
+
+ valuesRef.offset = off;
+ valuesRef.length = count;
+ off += count;
+ ord += count;
+ return valuesRef;
+ }
+
+ private void refill() throws IOException {
+ final int token = in.readByte() & 0xFF;
+ final boolean minEquals0 = (token & MIN_VALUE_EQUALS_0) != 0;
+ final int bitsPerValue = token >>> BPV_SHIFT;
+ if (bitsPerValue > 64) {
+ throw new IOException("Corrupted");
+ }
+ final long minValue = minEquals0 ? 0L : zigZagDecode(1L + readVLong(in));
+ assert minEquals0 || minValue != 0;
+
+ if (bitsPerValue == 0) {
+ Arrays.fill(values, minValue);
+ } else {
+ final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
+ final int iterations = blockSize / decoder.valueCount();
+ final int blocksSize = iterations * 8 * decoder.blockCount();
+ if (blocks == null || blocks.length < blocksSize) {
+ blocks = new byte[blocksSize];
+ }
+
+ final int valueCount = (int) Math.min(this.valueCount - ord, blockSize);
+ final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
+ in.readBytes(blocks, 0, blocksCount);
+
+ decoder.decode(blocks, 0, values, 0, iterations);
+
+ if (minValue != 0) {
+ for (int i = 0; i < valueCount; ++i) {
+ values[i] += minValue;
+ }
+ }
+ }
+ off = 0;
+ }
+
+ /** Return the offset of the next value to read. */
+ public long ord() {
+ return ord;
+ }
+
+}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java Mon Jan 21 23:47:31 2013
@@ -30,7 +30,7 @@ import org.apache.lucene.store.DataOutpu
* using as few bits as possible. Memory usage of this class is proportional to
* the block size. Each block has an overhead between 1 and 10 bytes to store
* the minimum value and the number of bits per value of the block.
- * @see BlockPackedReader
+ * @see BlockPackedReaderIterator
* @lucene.internal
*/
public final class BlockPackedWriter {
@@ -43,8 +43,11 @@ public final class BlockPackedWriter {
if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
}
- if (blockSize % 64 != 0) {
- throw new IllegalArgumentException("blockSize must be a multiple of 64, got " + blockSize);
+ if (blockSize < 64) {
+ throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
+ }
+ if ((blockSize & (blockSize - 1)) != 0) {
+ throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Mon Jan 21 23:47:31 2013
@@ -877,10 +877,11 @@ public class TestPackedInts extends Luce
in.close();
dir.close();
}
+
public void testBlockPackedReaderWriter() throws IOException {
final int iters = atLeast(2);
for (int iter = 0; iter < iters; ++iter) {
- final int blockSize = 64 * _TestUtil.nextInt(random(), 1, 1 << 12);
+ final int blockSize = 1 << _TestUtil.nextInt(random(), 6, 18);
final int valueCount = random().nextInt(1 << 18);
final long[] values = new long[valueCount];
long minValue = 0;
@@ -912,30 +913,29 @@ public class TestPackedInts extends Luce
final long fp = out.getFilePointer();
out.close();
- DataInput in = dir.openInput("out.bin", IOContext.DEFAULT);
- if (random().nextBoolean()) {
- byte[] buf = new byte[(int) fp];
- in.readBytes(buf, 0, (int) fp);
- ((IndexInput) in).close();
- in = new ByteArrayDataInput(buf);
- }
- final BlockPackedReader reader = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+ IndexInput in1 = dir.openInput("out.bin", IOContext.DEFAULT);
+ byte[] buf = new byte[(int) fp];
+ in1.readBytes(buf, 0, (int) fp);
+ in1.seek(0L);
+ ByteArrayDataInput in2 = new ByteArrayDataInput(buf);
+ final DataInput in = random().nextBoolean() ? in1 : in2;
+ final BlockPackedReaderIterator it = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
for (int i = 0; i < valueCount; ) {
if (random().nextBoolean()) {
- assertEquals("" + i, values[i], reader.next());
+ assertEquals("" + i, values[i], it.next());
++i;
} else {
- final LongsRef nextValues = reader.next(_TestUtil.nextInt(random(), 1, 1024));
+ final LongsRef nextValues = it.next(_TestUtil.nextInt(random(), 1, 1024));
for (int j = 0; j < nextValues.length; ++j) {
assertEquals("" + (i + j), values[i + j], nextValues.longs[nextValues.offset + j]);
}
i += nextValues.length;
}
- assertEquals(i, reader.ord());
+ assertEquals(i, it.ord());
}
assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
try {
- reader.next();
+ it.next();
assertTrue(false);
} catch (IOException e) {
// OK
@@ -946,31 +946,35 @@ public class TestPackedInts extends Luce
} else {
((IndexInput) in).seek(0L);
}
- final BlockPackedReader reader2 = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+ final BlockPackedReaderIterator it2 = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
int i = 0;
while (true) {
final int skip = _TestUtil.nextInt(random(), 0, valueCount - i);
- reader2.skip(skip);
+ it2.skip(skip);
i += skip;
- assertEquals(i, reader2.ord());
+ assertEquals(i, it2.ord());
if (i == valueCount) {
break;
} else {
- assertEquals(values[i], reader2.next());
+ assertEquals(values[i], it2.next());
++i;
}
}
assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
try {
- reader2.skip(1);
+ it2.skip(1);
assertTrue(false);
} catch (IOException e) {
// OK
}
- if (in instanceof IndexInput) {
- ((IndexInput) in).close();
+ in1.seek(0L);
+ final BlockPackedReader reader = new BlockPackedReader(in1, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
+ for (i = 0; i < valueCount; ++i) {
+ assertEquals("i=" + i, values[i], reader.get(i));
}
+ assertEquals(in1.getFilePointer(), in1.length());
+ in1.close();
dir.close();
}
}