You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/04/19 05:18:50 UTC

[lucene] branch main updated: LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new d152317  LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
d152317 is described below

commit d15231709ae631f6dc588a468a9f09dba6f0c293
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Mon Apr 19 07:18:41 2021 +0200

    LUCENE-9907: Remove dependency on PackedInts#getReaderNoHeader in MonotonicBlockPackedReader (#85)
---
 .../blockterms/FixedGapTermsIndexReader.java       |  6 +-
 .../util/packed/MonotonicBlockPackedReader.java    | 69 ++++++++++++++--------
 .../apache/lucene/util/packed/TestPackedInts.java  |  3 +-
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
index 39ad17d..ee21250 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
@@ -278,13 +278,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
 
         // records offsets into main terms dict file
         termsDictOffsets =
-            MonotonicBlockPackedReader.of(
-                clone, packedIntsVersion, blocksize, numIndexTerms, false);
+            MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, numIndexTerms);
 
         // records offsets into byte[] term data
         termOffsets =
-            MonotonicBlockPackedReader.of(
-                clone, packedIntsVersion, blocksize, 1 + numIndexTerms, false);
+            MonotonicBlockPackedReader.of(clone, packedIntsVersion, blocksize, 1 + numIndexTerms);
       } finally {
         clone.close();
       }
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
index dd52fbb..6893201 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
@@ -38,31 +38,34 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
     return origin + (long) (average * (long) index);
   }
 
+  private static final int BLOCK_SIZE = Byte.SIZE; // #bits in a block
+  private static final int BLOCK_BITS = 3; // The #bits representing BLOCK_SIZE
+  private static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
+
   final int blockShift, blockMask;
   final long valueCount;
   final long[] minValues;
   final float[] averages;
-  final PackedInts.Reader[] subReaders;
+  final LongValues[] subReaders;
   final long sumBPV;
+  final long totalByteCount;
 
   /** Sole constructor. */
   public static MonotonicBlockPackedReader of(
-      IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
-      throws IOException {
-    return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct);
+      IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
+    return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount);
   }
 
   private MonotonicBlockPackedReader(
-      IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct)
-      throws IOException {
+      IndexInput in, int packedIntsVersion, int blockSize, long valueCount) throws IOException {
     this.valueCount = valueCount;
     blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
     blockMask = blockSize - 1;
     final int numBlocks = numBlocks(valueCount, blockSize);
     minValues = new long[numBlocks];
     averages = new float[numBlocks];
-    subReaders = new PackedInts.Reader[numBlocks];
-    long sumBPV = 0;
+    subReaders = new LongValues[numBlocks];
+    long sumBPV = 0, totalByteCount = 0;
     for (int i = 0; i < numBlocks; ++i) {
       minValues[i] = in.readZLong();
       averages[i] = Float.intBitsToFloat(in.readInt());
@@ -72,24 +75,44 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
         throw new IOException("Corrupted");
       }
       if (bitsPerValue == 0) {
-        subReaders[i] = new PackedInts.NullReader(blockSize);
+        subReaders[i] = LongValues.ZEROES;
       } else {
         final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
-        if (direct) {
-          final long pointer = in.getFilePointer();
-          subReaders[i] =
-              PackedInts.getDirectReaderNoHeader(
-                  in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
-          in.seek(
-              pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
-        } else {
-          subReaders[i] =
-              PackedInts.getReaderNoHeader(
-                  in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
-        }
+        final int byteCount =
+            Math.toIntExact(
+                PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
+        totalByteCount += byteCount;
+        final byte[] blocks = new byte[byteCount];
+        in.readBytes(blocks, 0, byteCount);
+        final long maskRight = ((1L << bitsPerValue) - 1);
+        final int bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
+        subReaders[i] =
+            new LongValues() {
+              @Override
+              public long get(long index) {
+                // The abstract index in a bit stream
+                final long majorBitPos = index * bitsPerValue;
+                // The offset of the first block in the backing byte-array
+                int blockOffset = (int) (majorBitPos >>> BLOCK_BITS);
+                // The number of value-bits after the first byte
+                long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
+                if (endBits <= 0) {
+                  // Single block
+                  return ((blocks[blockOffset] & 0xFFL) >>> -endBits) & maskRight;
+                }
+                // Multiple blocks
+                long value = ((blocks[blockOffset++] & 0xFFL) << endBits) & maskRight;
+                while (endBits > BLOCK_SIZE) {
+                  endBits -= BLOCK_SIZE;
+                  value |= (blocks[blockOffset++] & 0xFFL) << endBits;
+                }
+                return value | ((blocks[blockOffset] & 0xFFL) >>> (BLOCK_SIZE - endBits));
+              }
+            };
       }
     }
     this.sumBPV = sumBPV;
+    this.totalByteCount = totalByteCount;
   }
 
   @Override
@@ -110,9 +133,7 @@ public class MonotonicBlockPackedReader extends LongValues implements Accountabl
     long sizeInBytes = 0;
     sizeInBytes += RamUsageEstimator.sizeOf(minValues);
     sizeInBytes += RamUsageEstimator.sizeOf(averages);
-    for (PackedInts.Reader reader : subReaders) {
-      sizeInBytes += reader.ramBytesUsed();
-    }
+    sizeInBytes += totalByteCount;
     return sizeInBytes;
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
index 2eb03a4..32b5ef2 100644
--- a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
+++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
@@ -1371,8 +1371,7 @@ public class TestPackedInts extends LuceneTestCase {
 
       final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT);
       final MonotonicBlockPackedReader reader =
-          MonotonicBlockPackedReader.of(
-              in, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
+          MonotonicBlockPackedReader.of(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
       assertEquals(fp, in.getFilePointer());
       for (int i = 0; i < valueCount; ++i) {
         assertEquals("i=" + i, values[i], reader.get(i));