You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/04/19 05:18:50 UTC
[lucene] branch main updated: LUCENE-9907: Remove dependency on
PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new d152317 LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
d152317 is described below
commit d15231709ae631f6dc588a468a9f09dba6f0c293
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Mon Apr 19 07:18:41 2021 +0200
LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
---
.../blockterms/FixedGapTermsIndexReader.java | 6 +-
.../util/packed/MonotonicBlockPackedReader.java | 69 ++++++++++++++--------
.../apache/lucene/util/packed/TestPackedInts.java | 3 +-
3 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
index 39ad17d..ee21250 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
@@ -278,13 +278,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// records offsets into main terms dict file
termsDictOffsets =
- MonotonicBlockPackedReader.of(
- clone, packedIntsVersion, blocksize, numIndexTerms, false);
+ MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, numIndexTerms);
// records offsets into byte[] term data
termOffsets =
- MonotonicBlockPackedReader.of(
- clone, packedIntsVersion, blocksize, 1 + numIndexTerms, false);
+ MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, 1 + numIndexTerms);
} finally {
clone.close();
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
index dd52fbb..6893201 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
@@ -38,31 +38,34 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
return origin + (long) (average * (long) index);
}
+ private static final int BLOCK_SIZE = Byte.SIZE; // #bits in a block
+ private static final int BLOCK_BITS = 3; // The #bits representing BLOCK_SIZE
+ private static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
+
final int blockShift, blockMask;
final long valueCount;
final long[] minValues;
final float[] averages;
- final PackedInts.Reader[] subReaders;
+ final LongValues[] subReaders;
final long sumBPV;
+ final long totalByteCount;
/** Sole constructor. */
public static MonotonicBlockPackedReader of(
- IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
- throws IOException {
- return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct);
+ IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
+ return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount);
}
private MonotonicBlockPackedReader(
- IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
- throws IOException {
+ IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
this.valueCount = valueCount;
blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1;
final int numBlocks = numBlocks(valueCount, blockSize);
minValues = new long[numBlocks];
averages = new float[numBlocks];
- subReaders = new PackedInts.Reader[numBlocks];
- long sumBPV = 0;
+ subReaders = new LongValues[numBlocks];
+ long sumBPV = 0, totalByteCount = 0;
for (int i = 0; i < numBlocks; ++i) {
minValues[i] = in.readZLong();
averages[i] = Float.intBitsToFloat(in.readInt());
@@ -72,24 +75,44 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
throw new IOException("Corrupted");
}
if (bitsPerValue == 0) {
- subReaders[i] = new PackedInts.NullReader(blockSize);
+ subReaders[i] = LongValues.ZEROES;
} else {
final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
- if (direct) {
- final long pointer = in.getFilePointer();
- subReaders[i] =
- PackedInts.getDirectReaderNoHeader(
- in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
- in.seek(
- pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
- } else {
- subReaders[i] =
- PackedInts.getReaderNoHeader(
- in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
- }
+ final int byteCount =
+ Math.toIntExact(
+ PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
+ totalByteCount += byteCount;
+ final byte[] blocks = new byte[byteCount];
+ in.readBytes(blocks, 0, byteCount);
+ final long maskRight = ((1L << bitsPerValue) - 1);
+ final int bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
+ subReaders[i] =
+ new LongValues() {
+ @Override
+ public long get(long index) {
+ // The abstract index in a bit stream
+ final long majorBitPos = index * bitsPerValue;
+ // The offset of the first block in the backing byte-array
+ int blockOffset = (int) (majorBitPos >>> BLOCK_BITS);
+ // The number of value-bits after the first byte
+ long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
+ if (endBits <= 0) {
+ // Single block
+ return ((blocks[blockOffset] & 0xFFL) >>> -endBits) & maskRight;
+ }
+ // Multiple blocks
+ long value = ((blocks[blockOffset++] & 0xFFL) << endBits) & maskRight;
+ while (endBits > BLOCK_SIZE) {
+ endBits -= BLOCK_SIZE;
+ value |= (blocks[blockOffset++] & 0xFFL) << endBits;
+ }
+ return value | ((blocks[blockOffset] & 0xFFL) >>> (BLOCK_SIZE - endBits));
+ }
+ };
}
}
this.sumBPV = sumBPV;
+ this.totalByteCount = totalByteCount;
}
@Override
@@ -110,9 +133,7 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
long sizeInBytes = 0;
sizeInBytes += RamUsageEstimator.sizeOf(minValues);
sizeInBytes += RamUsageEstimator.sizeOf(averages);
- for (PackedInts.Reader reader : subReaders) {
- sizeInBytes += reader.ramBytesUsed();
- }
+ sizeInBytes += totalByteCount;
return sizeInBytes;
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
index 2eb03a4..32b5ef2 100644
--- a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
+++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
@@ -1371,8 +1371,7 @@ public class TestPackedInts extends LuceneTestCase {
final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT);
final MonotonicBlockPackedReader reader =
- MonotonicBlockPackedReader.of(
- in, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
+ MonotonicBlockPackedReader.of(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
assertEquals(fp, in.getFilePointer());
for (int i = 0; i < valueCount; ++i) {
assertEquals("i=" + i, values[i], reader.get(i));