You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/04/15 14:04:22 UTC
[lucene] branch main updated: LUCENE-9907: Remove
packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 873ac5f LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
873ac5f is described below
commit 873ac5f162a9b5e138319b468e91d7dd28b22397
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Thu Apr 15 16:04:13 2021 +0200
LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
---
.../Lucene90CompressingTermVectorsReader.java | 65 ++++++++++------------
.../Lucene90CompressingTermVectorsWriter.java | 47 ++++++++--------
2 files changed, 55 insertions(+), 57 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
index 6e5e0d3..ec8823d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
@@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
@@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteBuffersDataInput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
+import org.apache.lucene.util.packed.DirectReader;
+import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
return new Lucene90CompressingTermVectorsReader(this);
}
+ private static RandomAccessInput slice(IndexInput in) throws IOException {
+ final int length = in.readVInt();
+ final byte[] bytes = new byte[length];
+ in.readBytes(bytes, 0, length);
+ return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
+ }
+
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
@@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
// read field numbers and flags
final int[] fieldNumOffs = new int[numFields];
- final PackedInts.Reader flags;
+ final LongValues flags;
{
- final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
- final PackedInts.Reader allFieldNumOffs =
- PackedInts.getReaderNoHeader(
- vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
+ final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
+ final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
switch (vectorsStream.readVInt()) {
case 0:
- final PackedInts.Reader fieldFlags =
- PackedInts.getReaderNoHeader(
- vectorsStream,
- PackedInts.Format.PACKED,
- packedIntsVersion,
- fieldNums.length,
- FLAGS_BITS);
- PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
+ final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
+ final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
+ final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
for (int i = 0; i < totalFields; ++i) {
final int fieldNumOff = (int) allFieldNumOffs.get(i);
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
- final int fgs = (int) fieldFlags.get(fieldNumOff);
- f.set(i, fgs);
+ writer.add(fieldFlags.get(fieldNumOff));
}
- flags = f;
+ writer.finish();
+ flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
break;
case 1:
- flags =
- PackedInts.getReaderNoHeader(
- vectorsStream,
- PackedInts.Format.PACKED,
- packedIntsVersion,
- totalFields,
- FLAGS_BITS);
+ flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
break;
default:
throw new AssertionError();
@@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
}
// number of terms per field for all fields
- final PackedInts.Reader numTerms;
+ final LongValues numTerms;
final int totalTerms;
{
final int bitsRequired = vectorsStream.readVInt();
- numTerms =
- PackedInts.getReaderNoHeader(
- vectorsStream,
- PackedInts.Format.PACKED,
- packedIntsVersion,
- totalFields,
- bitsRequired);
+ numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
int sum = 0;
for (int i = 0; i < totalFields; ++i) {
sum += numTerms.get(i);
@@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
}
// field -> term index -> position index
- private int[][] positionIndex(
- int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
+ private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
final int[][] positionIndex = new int[numFields][];
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
@@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
private int[][] readPositions(
int skip,
int numFields,
- PackedInts.Reader flags,
- PackedInts.Reader numTerms,
+ LongValues flags,
+ LongValues numTerms,
int[] termFreqs,
int flag,
final int totalPositions,
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
index 65b3442..ed54ce7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
@@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
static final int POSITIONS = 0x01;
static final int OFFSETS = 0x02;
static final int PAYLOADS = 0x04;
- static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
+ static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
private final String segment;
private FieldsIndexWriter indexWriter;
@@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
private final BlockPackedWriter writer;
private final int maxDocsPerChunk; // hard limit on number of docs per chunk
+ private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
/** Sole constructor. */
Lucene90CompressingTermVectorsWriter(
@@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
}
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
- final PackedInts.Writer writer =
- PackedInts.getWriterNoHeader(
- vectorsStream,
- PackedInts.Format.PACKED,
- totalFields,
- PackedInts.bitsRequired(fieldNums.length - 1),
- 1);
+ scratchBuffer.reset();
+ final DirectWriter writer =
+ DirectWriter.getInstance(
+ scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
@@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
}
}
writer.finish();
+ vectorsStream.writeVLong(scratchBuffer.size());
+ scratchBuffer.copyTo(vectorsStream);
}
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
@@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
if (nonChangingFlags) {
// write one flag per field num
vectorsStream.writeVInt(0);
- final PackedInts.Writer writer =
- PackedInts.getWriterNoHeader(
- vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
+ scratchBuffer.reset();
+ final DirectWriter writer =
+ DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
for (int flags : fieldFlags) {
assert flags >= 0;
writer.add(flags);
}
- assert writer.ord() == fieldFlags.length - 1;
writer.finish();
+ vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+ scratchBuffer.copyTo(vectorsStream);
} else {
// write one flag for every field instance
vectorsStream.writeVInt(1);
- final PackedInts.Writer writer =
- PackedInts.getWriterNoHeader(
- vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
+ scratchBuffer.reset();
+ final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.flags);
}
}
- assert writer.ord() == totalFields - 1;
writer.finish();
+ vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+ scratchBuffer.copyTo(vectorsStream);
}
}
@@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
maxNumTerms |= fd.numTerms;
}
}
- final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
+ final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
vectorsStream.writeVInt(bitsRequired);
- final PackedInts.Writer writer =
- PackedInts.getWriterNoHeader(
- vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
+ scratchBuffer.reset();
+ final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.numTerms);
}
}
- assert writer.ord() == totalFields - 1;
writer.finish();
+ vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
+ scratchBuffer.copyTo(vectorsStream);
}
private void flushTermLengths() throws IOException {
@@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
+ payloadLengthsBuf.length
+ termSuffixes.ramBytesUsed()
+ payloadBytes.ramBytesUsed()
- + lastTerm.bytes.length;
+ + lastTerm.bytes.length
+ + scratchBuffer.ramBytesUsed();
}
@Override