You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/04/15 14:04:22 UTC

[lucene] branch main updated: LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 873ac5f  LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
873ac5f is described below

commit 873ac5f162a9b5e138319b468e91d7dd28b22397
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Thu Apr 15 16:04:13 2021 +0200

    LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
---
 .../Lucene90CompressingTermVectorsReader.java      | 65 ++++++++++------------
 .../Lucene90CompressingTermVectorsWriter.java      | 47 ++++++++--------
 2 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
index 6e5e0d3..ec8823d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
@@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
 import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 import org.apache.lucene.codecs.CodecUtil;
@@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteBuffersDataInput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.packed.BlockPackedReaderIterator;
+import org.apache.lucene.util.packed.DirectReader;
+import org.apache.lucene.util.packed.DirectWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /**
@@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
     return new Lucene90CompressingTermVectorsReader(this);
   }
 
+  private static RandomAccessInput slice(IndexInput in) throws IOException {
+    final int length = in.readVInt();
+    final byte[] bytes = new byte[length];
+    in.readBytes(bytes, 0, length);
+    return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
+  }
+
   @Override
   public Fields get(int doc) throws IOException {
     ensureOpen();
@@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
 
     // read field numbers and flags
     final int[] fieldNumOffs = new int[numFields];
-    final PackedInts.Reader flags;
+    final LongValues flags;
     {
-      final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
-      final PackedInts.Reader allFieldNumOffs =
-          PackedInts.getReaderNoHeader(
-              vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
+      final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
+      final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
       switch (vectorsStream.readVInt()) {
         case 0:
-          final PackedInts.Reader fieldFlags =
-              PackedInts.getReaderNoHeader(
-                  vectorsStream,
-                  PackedInts.Format.PACKED,
-                  packedIntsVersion,
-                  fieldNums.length,
-                  FLAGS_BITS);
-          PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
+          final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
+          final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
+          final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
           for (int i = 0; i < totalFields; ++i) {
             final int fieldNumOff = (int) allFieldNumOffs.get(i);
             assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
-            final int fgs = (int) fieldFlags.get(fieldNumOff);
-            f.set(i, fgs);
+            writer.add(fieldFlags.get(fieldNumOff));
           }
-          flags = f;
+          writer.finish();
+          flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
           break;
         case 1:
-          flags =
-              PackedInts.getReaderNoHeader(
-                  vectorsStream,
-                  PackedInts.Format.PACKED,
-                  packedIntsVersion,
-                  totalFields,
-                  FLAGS_BITS);
+          flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
           break;
         default:
           throw new AssertionError();
@@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
     }
 
     // number of terms per field for all fields
-    final PackedInts.Reader numTerms;
+    final LongValues numTerms;
     final int totalTerms;
     {
       final int bitsRequired = vectorsStream.readVInt();
-      numTerms =
-          PackedInts.getReaderNoHeader(
-              vectorsStream,
-              PackedInts.Format.PACKED,
-              packedIntsVersion,
-              totalFields,
-              bitsRequired);
+      numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
       int sum = 0;
       for (int i = 0; i < totalFields; ++i) {
         sum += numTerms.get(i);
@@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
   }
 
   // field -> term index -> position index
-  private int[][] positionIndex(
-      int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
+  private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
     final int[][] positionIndex = new int[numFields][];
     int termIndex = 0;
     for (int i = 0; i < skip; ++i) {
@@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
   private int[][] readPositions(
       int skip,
       int numFields,
-      PackedInts.Reader flags,
-      PackedInts.Reader numTerms,
+      LongValues flags,
+      LongValues numTerms,
       int[] termFreqs,
       int flag,
       final int totalPositions,
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
index 65b3442..ed54ce7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
@@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.DirectWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /**
@@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
   static final int POSITIONS = 0x01;
   static final int OFFSETS = 0x02;
   static final int PAYLOADS = 0x04;
-  static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
+  static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
 
   private final String segment;
   private FieldsIndexWriter indexWriter;
@@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
   private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
   private final BlockPackedWriter writer;
   private final int maxDocsPerChunk; // hard limit on number of docs per chunk
+  private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
 
   /** Sole constructor. */
   Lucene90CompressingTermVectorsWriter(
@@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
   }
 
   private void flushFields(int totalFields, int[] fieldNums) throws IOException {
-    final PackedInts.Writer writer =
-        PackedInts.getWriterNoHeader(
-            vectorsStream,
-            PackedInts.Format.PACKED,
-            totalFields,
-            PackedInts.bitsRequired(fieldNums.length - 1),
-            1);
+    scratchBuffer.reset();
+    final DirectWriter writer =
+        DirectWriter.getInstance(
+            scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
     for (DocData dd : pendingDocs) {
       for (FieldData fd : dd.fields) {
         final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
@@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
       }
     }
     writer.finish();
+    vectorsStream.writeVLong(scratchBuffer.size());
+    scratchBuffer.copyTo(vectorsStream);
   }
 
   private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
@@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
     if (nonChangingFlags) {
       // write one flag per field num
       vectorsStream.writeVInt(0);
-      final PackedInts.Writer writer =
-          PackedInts.getWriterNoHeader(
-              vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
+      scratchBuffer.reset();
+      final DirectWriter writer =
+          DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
       for (int flags : fieldFlags) {
         assert flags >= 0;
         writer.add(flags);
       }
-      assert writer.ord() == fieldFlags.length - 1;
       writer.finish();
+      vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+      scratchBuffer.copyTo(vectorsStream);
     } else {
       // write one flag for every field instance
       vectorsStream.writeVInt(1);
-      final PackedInts.Writer writer =
-          PackedInts.getWriterNoHeader(
-              vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
+      scratchBuffer.reset();
+      final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
       for (DocData dd : pendingDocs) {
         for (FieldData fd : dd.fields) {
           writer.add(fd.flags);
         }
       }
-      assert writer.ord() == totalFields - 1;
       writer.finish();
+      vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+      scratchBuffer.copyTo(vectorsStream);
     }
   }
 
@@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
         maxNumTerms |= fd.numTerms;
       }
     }
-    final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
+    final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
     vectorsStream.writeVInt(bitsRequired);
-    final PackedInts.Writer writer =
-        PackedInts.getWriterNoHeader(
-            vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
+    scratchBuffer.reset();
+    final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
     for (DocData dd : pendingDocs) {
       for (FieldData fd : dd.fields) {
         writer.add(fd.numTerms);
       }
     }
-    assert writer.ord() == totalFields - 1;
     writer.finish();
+    vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+    scratchBuffer.copyTo(vectorsStream);
   }
 
   private void flushTermLengths() throws IOException {
@@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
         + payloadLengthsBuf.length
         + termSuffixes.ramBytesUsed()
         + payloadBytes.ramBytesUsed()
-        + lastTerm.bytes.length;
+        + lastTerm.bytes.length
+        + scratchBuffer.ramBytesUsed();
   }
 
   @Override