You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by br...@apache.org on 2019/12/31 10:46:39 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9106: UniformSplit
postings format allows extension of block/line serializers.
This is an automated email from the ASF dual-hosted git repository.
broustant pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new a97271f LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.
a97271f is described below
commit a97271fc521672bebfe629c7e09cd7fd2aca52d5
Author: Bruno Roustant <br...@salesforce.com>
AuthorDate: Tue Dec 31 11:27:53 2019 +0100
LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.
---
lucene/CHANGES.txt | 2 +
.../lucene/codecs/uniformsplit/BlockHeader.java | 70 +++++-----
.../lucene/codecs/uniformsplit/BlockLine.java | 10 +-
.../lucene/codecs/uniformsplit/BlockReader.java | 20 ++-
.../lucene/codecs/uniformsplit/BlockWriter.java | 24 +++-
.../lucene/codecs/uniformsplit/FieldMetadata.java | 151 +++++++++++----------
.../uniformsplit/UniformSplitTermsReader.java | 16 ++-
.../uniformsplit/UniformSplitTermsWriter.java | 8 +-
.../uniformsplit/sharedterms/STBlockLine.java | 8 +-
.../uniformsplit/sharedterms/STBlockReader.java | 7 +-
.../uniformsplit/sharedterms/STBlockWriter.java | 9 +-
.../sharedterms/STIntersectBlockReader.java | 7 +-
.../sharedterms/STMergingBlockReader.java | 2 +-
.../sharedterms/STUniformSplitTermsReader.java | 12 +-
.../sharedterms/STUniformSplitTermsWriter.java | 9 +-
15 files changed, 213 insertions(+), 142 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 34236c3..1a4fbab 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -28,6 +28,8 @@ Improvements
* LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO exceptions. (Bruno Roustant)
+* LUCENE-9106: UniformSplit postings format allows extension of block/line serializers. (Bruno Roustant)
+
Optimizations
---------------------
(No changes)
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
index d512fdc..257412f 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
@@ -134,43 +134,49 @@ public class BlockHeader implements Accountable {
return basePayloadsFP;
}
- public void write(DataOutput output) throws IOException {
- assert linesCount > 0 : "block header does not seem to be initialized";
- output.writeVInt(linesCount);
-
- output.writeVLong(baseDocsFP);
- output.writeVLong(basePositionsFP);
- output.writeVLong(basePayloadsFP);
-
- output.writeVInt(termStatesBaseOffset);
- output.writeVInt(middleLineOffset);
+ @Override
+ public long ramBytesUsed() {
+ return RAM_USAGE;
}
- public static BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
- int linesCount = input.readVInt();
- if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
- throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
- }
+ /**
+ * Reads/writes block header.
+ */
+ public static class Serializer {
- long baseDocsFP = input.readVLong();
- long basePositionsFP = input.readVLong();
- long basePayloadsFP = input.readVLong();
+ public void write(DataOutput output, BlockHeader blockHeader) throws IOException {
+ assert blockHeader.linesCount > 0 : "Block header is not initialized";
+ output.writeVInt(blockHeader.linesCount);
- int termStatesBaseOffset = input.readVInt();
- if (termStatesBaseOffset < 0) {
- throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
- }
- int middleTermOffset = input.readVInt();
- if (middleTermOffset < 0) {
- throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
- }
+ output.writeVLong(blockHeader.baseDocsFP);
+ output.writeVLong(blockHeader.basePositionsFP);
+ output.writeVLong(blockHeader.basePayloadsFP);
- BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
- return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
- }
+ output.writeVInt(blockHeader.termStatesBaseOffset);
+ output.writeVInt(blockHeader.middleLineOffset);
+ }
- @Override
- public long ramBytesUsed() {
- return RAM_USAGE;
+ public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
+ int linesCount = input.readVInt();
+ if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
+ throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
+ }
+
+ long baseDocsFP = input.readVLong();
+ long basePositionsFP = input.readVLong();
+ long basePayloadsFP = input.readVLong();
+
+ int termStatesBaseOffset = input.readVInt();
+ if (termStatesBaseOffset < 0) {
+ throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
+ }
+ int middleTermOffset = input.readVInt();
+ if (middleTermOffset < 0) {
+ throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
+ }
+
+ BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
+ return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
+ }
}
}
\ No newline at end of file
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
index cea8e93..e39cfc6 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
@@ -107,7 +107,7 @@ public class BlockLine implements Accountable {
}
/**
- * Reads block lines with terms encoded incrementally inside a block.
+ * Reads/writes block lines with terms encoded incrementally inside a block.
* This class keeps a state of the previous term read to decode the next term.
*/
public static class Serializer implements Accountable {
@@ -149,7 +149,7 @@ public class BlockLine implements Accountable {
* the incremental encoding. {@code true} for the first
* and middle term, {@code false} for other terms.
*/
- public static void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
+ public void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
int termStateRelativeOffset, boolean isIncrementalEncodingSeed) throws IOException {
blockOutput.writeVInt(termStateRelativeOffset);
writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(),
@@ -161,13 +161,13 @@ public class BlockLine implements Accountable {
*
* @param termStatesOutput The output pointing to the details region.
*/
- protected static void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
+ protected void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder) throws IOException {
assert line.termState != null;
encoder.writeTermState(termStatesOutput, fieldInfo, line.termState);
}
- protected static void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
+ protected void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
boolean isIncrementalEncodingSeed, DataOutput blockOutput) throws IOException {
BytesRef term = termBytes.getTerm();
assert term.offset == 0;
@@ -240,7 +240,7 @@ public class BlockLine implements Accountable {
* Reads {@code length} bytes from the given {@link DataInput} and stores
* them at {@code offset} in {@code bytes.bytes}.
*/
- protected static void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
+ protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
assert bytes.offset == 0;
bytes.length = offset + length;
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
index 5c9c840b..8d4bfc0 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
@@ -60,6 +60,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
protected final FieldMetadata fieldMetadata;
protected final BlockDecoder blockDecoder;
+ protected BlockHeader.Serializer blockHeaderReader;
protected BlockLine.Serializer blockLineReader;
/**
* In-memory read buffer for the current block.
@@ -406,14 +407,27 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
protected void initializeBlockReadLazily() throws IOException {
if (blockStartFP == -1) {
blockInput = blockInput.clone();
- blockLineReader = new BlockLine.Serializer();
+ blockHeaderReader = createBlockHeaderSerializer();
+ blockLineReader = createBlockLineSerializer();
blockReadBuffer = new ByteArrayDataInput();
termStatesReadBuffer = new ByteArrayDataInput();
- termStateSerializer = new DeltaBaseTermStateSerializer();
+ termStateSerializer = createDeltaBaseTermStateSerializer();
scratchBlockBytes = new BytesRef();
}
}
+ protected BlockHeader.Serializer createBlockHeaderSerializer() {
+ return new BlockHeader.Serializer();
+ }
+
+ protected BlockLine.Serializer createBlockLineSerializer() {
+ return new BlockLine.Serializer();
+ }
+
+ protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+ return new DeltaBaseTermStateSerializer();
+ }
+
/**
* Reads the block header.
* Sets {@link #blockHeader}.
@@ -428,7 +442,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes);
blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
- return blockHeader = BlockHeader.read(blockReadBuffer, blockHeader);
+ return blockHeader = blockHeaderReader.read(blockReadBuffer, blockHeader);
}
protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
index acc397d..a1c3d70 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
@@ -60,6 +60,8 @@ public class BlockWriter {
protected final ByteBuffersDataOutput blockLinesWriteBuffer;
protected final ByteBuffersDataOutput termStatesWriteBuffer;
+ protected final BlockHeader.Serializer blockHeaderWriter;
+ protected final BlockLine.Serializer blockLineWriter;
protected final DeltaBaseTermStateSerializer termStateSerializer;
protected final BlockEncoder blockEncoder;
protected final ByteBuffersDataOutput blockWriteBuffer;
@@ -81,7 +83,9 @@ public class BlockWriter {
this.blockEncoder = blockEncoder;
this.blockLines = new ArrayList<>(targetNumBlockLines);
- this.termStateSerializer = new DeltaBaseTermStateSerializer();
+ this.blockHeaderWriter = createBlockHeaderSerializer();
+ this.blockLineWriter = createBlockLineSerializer();
+ this.termStateSerializer = createDeltaBaseTermStateSerializer();
this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
@@ -91,6 +95,18 @@ public class BlockWriter {
this.scratchBytesRef = new BytesRef();
}
+ protected BlockHeader.Serializer createBlockHeaderSerializer() {
+ return new BlockHeader.Serializer();
+ }
+
+ protected BlockLine.Serializer createBlockLineSerializer() {
+ return new BlockLine.Serializer();
+ }
+
+ protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+ return new DeltaBaseTermStateSerializer();
+ }
+
/**
* Adds a new {@link BlockLine} term for the current field.
* <p>
@@ -196,7 +212,7 @@ public class BlockWriter {
reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(), termStateSerializer.getBasePosStartFP(),
termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()), middleOffset);
- reusableBlockHeader.write(blockWriteBuffer);
+ blockHeaderWriter.write(blockWriteBuffer, reusableBlockHeader);
blockLinesWriteBuffer.copyTo(blockWriteBuffer);
termStatesWriteBuffer.copyTo(blockWriteBuffer);
@@ -236,8 +252,8 @@ public class BlockWriter {
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
assert fieldMetadata != null;
- BlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
- BlockLine.Serializer.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
+ blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
+ blockLineWriter.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
}
/**
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
index f4cede0..8e2fc84 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
@@ -194,88 +194,99 @@ public class FieldMetadata implements Accountable {
+ (docsSeen == null ? 0 : docsSeen.ramBytesUsed());
}
- public static FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
- int fieldId = input.readVInt();
- FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
- if (fieldInfo == null) {
- throw new CorruptIndexException("Illegal field id= " + fieldId, input);
- }
- FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
+ /**
+ * Reads/writes field metadata.
+ */
+ public static class Serializer {
- fieldMetadata.numTerms = input.readVInt();
- if (fieldMetadata.numTerms <= 0) {
- throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
- }
+ /**
+ * Stateless singleton.
+ */
+ public static final Serializer INSTANCE = new Serializer();
- fieldMetadata.sumDocFreq = input.readVInt();
- fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
- if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
- fieldMetadata.sumTotalTermFreq += input.readVInt();
- if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
- // #positions must be >= #postings.
- throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
- + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
- }
- }
+ public void write(DataOutput output, FieldMetadata fieldMetadata) throws IOException {
+ assert fieldMetadata.dictionaryStartFP >= 0;
+ assert fieldMetadata.firstBlockStartFP >= 0;
+ assert fieldMetadata.lastBlockStartFP >= 0;
+ assert fieldMetadata.numTerms > 0 : "There should be at least one term for field " + fieldMetadata.fieldInfo.name + ": " + fieldMetadata.numTerms;
+ assert fieldMetadata.firstBlockStartFP <= fieldMetadata.lastBlockStartFP : "start: " + fieldMetadata.firstBlockStartFP + " end: " + fieldMetadata.lastBlockStartFP;
+ assert fieldMetadata.lastTerm != null : "you must set the last term";
- fieldMetadata.docCount = input.readVInt();
- if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
- // #docs with field must be <= #docs.
- throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
- + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
- }
- if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
- // #postings must be >= #docs with field.
- throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
- + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
- }
+ output.writeVInt(fieldMetadata.fieldInfo.number);
- fieldMetadata.dictionaryStartFP = input.readVLong();
- fieldMetadata.firstBlockStartFP = input.readVLong();
- fieldMetadata.lastBlockStartFP = input.readVLong();
-
- int lastTermLength = input.readVInt();
- BytesRef lastTerm = new BytesRef(lastTermLength);
- if (lastTermLength > 0) {
- input.readBytes(lastTerm.bytes, 0, lastTermLength);
- lastTerm.length = lastTermLength;
- } else if (lastTermLength < 0) {
- throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
- }
- fieldMetadata.setLastTerm(lastTerm);
-
- return fieldMetadata;
- }
+ output.writeVInt(fieldMetadata.numTerms);
+ output.writeVInt(fieldMetadata.sumDocFreq);
- public void write(DataOutput output) throws IOException {
- assert dictionaryStartFP >= 0;
- assert firstBlockStartFP >= 0;
- assert lastBlockStartFP >= 0;
- assert numTerms > 0 : "There should be at least one term for field " + fieldInfo.name + ": " + numTerms;
- assert firstBlockStartFP <= lastBlockStartFP : "start: " + firstBlockStartFP + " end: " + lastBlockStartFP;
- assert lastTerm != null : "you must set the last term";
+ if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
+ assert fieldMetadata.sumTotalTermFreq >= fieldMetadata.sumDocFreq : "sumTotalFQ: " + fieldMetadata.sumTotalTermFreq + " sumDocFQ: " + fieldMetadata.sumDocFreq;
+ output.writeVInt(fieldMetadata.sumTotalTermFreq - fieldMetadata.sumDocFreq);
+ }
- output.writeVInt(fieldInfo.number);
+ output.writeVInt(fieldMetadata.getDocCount());
- output.writeVInt(numTerms);
- output.writeVInt(sumDocFreq);
+ output.writeVLong(fieldMetadata.dictionaryStartFP);
+ output.writeVLong(fieldMetadata.firstBlockStartFP);
+ output.writeVLong(fieldMetadata.lastBlockStartFP);
- if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
- assert sumTotalTermFreq >= sumDocFreq : "sumTotalFQ: " + sumTotalTermFreq + " sumDocFQ: " + sumDocFreq;
- output.writeVInt(sumTotalTermFreq - sumDocFreq);
+ if (fieldMetadata.lastTerm.length > 0) {
+ output.writeVInt(fieldMetadata.lastTerm.length);
+ output.writeBytes(fieldMetadata.lastTerm.bytes, fieldMetadata.lastTerm.offset, fieldMetadata.lastTerm.length);
+ } else {
+ output.writeVInt(0);
+ }
}
- output.writeVInt(getDocCount());
+ public FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
+ int fieldId = input.readVInt();
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
+ if (fieldInfo == null) {
+ throw new CorruptIndexException("Illegal field id= " + fieldId, input);
+ }
+ FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
+
+ fieldMetadata.numTerms = input.readVInt();
+ if (fieldMetadata.numTerms <= 0) {
+ throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
+ }
- output.writeVLong(dictionaryStartFP);
- output.writeVLong(firstBlockStartFP);
- output.writeVLong(lastBlockStartFP);
+ fieldMetadata.sumDocFreq = input.readVInt();
+ fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
+ if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
+ fieldMetadata.sumTotalTermFreq += input.readVInt();
+ if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
+ // #positions must be >= #postings.
+ throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
+ + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
+ }
+ }
+
+ fieldMetadata.docCount = input.readVInt();
+ if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
+ // #docs with field must be <= #docs.
+ throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
+ + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
+ }
+ if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
+ // #postings must be >= #docs with field.
+ throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
+ + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
+ }
+
+ fieldMetadata.dictionaryStartFP = input.readVLong();
+ fieldMetadata.firstBlockStartFP = input.readVLong();
+ fieldMetadata.lastBlockStartFP = input.readVLong();
+
+ int lastTermLength = input.readVInt();
+ BytesRef lastTerm = new BytesRef(lastTermLength);
+ if (lastTermLength > 0) {
+ input.readBytes(lastTerm.bytes, 0, lastTermLength);
+ lastTerm.length = lastTermLength;
+ } else if (lastTermLength < 0) {
+ throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
+ }
+ fieldMetadata.setLastTerm(lastTerm);
- if (lastTerm.length > 0) {
- output.writeVInt(lastTerm.length);
- output.writeBytes(lastTerm.bytes, lastTerm.offset, lastTerm.length);
- } else {
- output.writeVInt(0);
+ return fieldMetadata;
}
}
}
\ No newline at end of file
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
index f116eee..bda0406 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
@@ -69,7 +69,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
* It can be used for decompression or decryption.
*/
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
- this(postingsReader, state, blockDecoder, NAME, VERSION_START, VERSION_CURRENT,
+ this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
}
@@ -77,8 +77,10 @@ public class UniformSplitTermsReader extends FieldsProducer {
* @param blockDecoder Optional block decoder, may be null if none.
* It can be used for decompression or decryption.
*/
- protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
- String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
+ protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
+ BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+ String codecName, int versionStart, int versionCurrent,
+ String termsBlocksExtension, String dictionaryExtension) throws IOException {
IndexInput dictionaryInput = null;
IndexInput blockInput = null;
boolean success = false;
@@ -100,7 +102,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
CodecUtil.retrieveChecksum(blockInput);
seekFieldsMetadata(blockInput);
- Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, state.segmentInfo.maxDoc());
+ Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());
fieldToTermsMap = new HashMap<>();
this.blockInput = blockInput;
@@ -133,19 +135,19 @@ public class UniformSplitTermsReader extends FieldsProducer {
* @param indexInput {@link IndexInput} must be positioned to the fields metadata
* details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
*/
- protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
+ protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos,
+ FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
int numFields = indexInput.readVInt();
if (numFields < 0) {
throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
}
Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
for (int i = 0; i < numFields; i++) {
- fieldMetadataCollection.add(FieldMetadata.read(indexInput, fieldInfos, maxNumDocs));
+ fieldMetadataCollection.add(fieldMetadataReader.read(indexInput, fieldInfos, maxNumDocs));
}
return fieldMetadataCollection;
}
-
@Override
public void close() throws IOException {
try {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
index 5adf74d..101b6b5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
@@ -128,6 +128,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
protected final int deltaNumLines;
protected final BlockEncoder blockEncoder;
+ protected final FieldMetadata.Serializer fieldMetadataWriter;
protected final IndexOutput blockOutput;
protected final IndexOutput dictionaryOutput;
@@ -146,7 +147,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
*/
public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
- this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder,
+ this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
}
@@ -164,7 +165,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
* It can be used for compression or encryption.
*/
protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
- int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+ int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
validateSettings(targetNumBlockLines, deltaNumLines);
IndexOutput blockOutput = null;
@@ -177,6 +178,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
this.targetNumBlockLines = targetNumBlockLines;
this.deltaNumLines = deltaNumLines;
this.blockEncoder = blockEncoder;
+ this.fieldMetadataWriter = fieldMetadataWriter;
String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, termsBlocksExtension);
blockOutput = state.directory.createOutput(termsName, state.context);
@@ -278,7 +280,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
if (fieldMetadata.getNumTerms() > 0) {
fieldMetadata.setLastTerm(lastTerm);
- fieldMetadata.write(fieldsOutput);
+ fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
writeDictionary(dictionaryBuilder);
return 1;
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
index 46f6ab7..7dfe312 100755
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
@@ -75,7 +75,7 @@ public class STBlockLine extends BlockLine {
/**
* Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given output.
*/
- public static void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
+ public void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
DeltaBaseTermStateSerializer encoder) throws IOException {
FieldMetadataTermState fieldMetadataTermState;
@@ -111,7 +111,7 @@ public class STBlockLine extends BlockLine {
* @return The {@link BlockTermState} corresponding to the provided field id; or null if the field
* does not occur in the line.
*/
- public static BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
+ public BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
DeltaBaseTermStateSerializer termStateSerializer,
BlockHeader blockHeader, FieldInfos fieldInfos,
BlockTermState reuse) throws IOException {
@@ -161,7 +161,7 @@ public class STBlockLine extends BlockLine {
* @param fieldTermStatesMap Map filled with the term states for each field. It is cleared first.
* @see #readTermStateForField
*/
- public static void readFieldTermStatesMap(DataInput termStatesInput,
+ public void readFieldTermStatesMap(DataInput termStatesInput,
DeltaBaseTermStateSerializer termStateSerializer,
BlockHeader blockHeader,
FieldInfos fieldInfos,
@@ -183,7 +183,7 @@ public class STBlockLine extends BlockLine {
/**
* Reads all the field ids in the current block line of the provided input.
*/
- public static int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
+ public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
int[] fieldIds = new int[numFields];
for (int i = 0; i < numFields; i++) {
fieldIds[i] = termStatesInput.readVInt();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
index 3f04d92..6d7c79d 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
@@ -117,6 +117,11 @@ public class STBlockReader extends BlockReader {
return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm, blockStartFP);
}
+ @Override
+ protected STBlockLine.Serializer createBlockLineSerializer() {
+ return new STBlockLine.Serializer();
+ }
+
/**
* Reads the {@link BlockTermState} on the current line for this reader's field.
*
@@ -125,7 +130,7 @@ public class STBlockReader extends BlockReader {
@Override
protected BlockTermState readTermState() throws IOException {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
- return termState = STBlockLine.Serializer.readTermStateForField(
+ return termState = ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
fieldMetadata.getFieldInfo().number,
termStatesReadBuffer,
termStateSerializer,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
index 4c37a4e..15e8d54 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
@@ -85,9 +85,14 @@ public class STBlockWriter extends BlockWriter {
}
@Override
+ protected BlockLine.Serializer createBlockLineSerializer() {
+ return new STBlockLine.Serializer();
+ }
+
+ @Override
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
- STBlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
- STBlockLine.Serializer.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
+ blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
+ ((STBlockLine.Serializer) blockLineWriter).writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
((STBlockLine) line).collectFields(fieldsInBlock);
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
index 577aae1..099b6c3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
@@ -91,6 +91,11 @@ public class STIntersectBlockReader extends IntersectBlockReader {
return super.nextBlockMatchingPrefix() && blockHeader != null;
}
+ @Override
+ protected STBlockLine.Serializer createBlockLineSerializer() {
+ return new STBlockLine.Serializer();
+ }
+
/**
* Reads the {@link BlockTermState} on the current line for the specific field
* corresponding this this reader.
@@ -100,7 +105,7 @@ public class STIntersectBlockReader extends IntersectBlockReader {
@Override
protected BlockTermState readTermState() throws IOException {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
- return STBlockLine.Serializer.readTermStateForField(
+ return ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
fieldMetadata.getFieldInfo().number,
termStatesReadBuffer,
termStateSerializer,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
index 5f135bb..fe6bdbe 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
@@ -98,7 +98,7 @@ public class STMergingBlockReader extends STBlockReader {
public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap) throws IOException {
if (term() != null) {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
- STBlockLine.Serializer.readFieldTermStatesMap(
+ ((STBlockLine.Serializer) blockLineReader).readFieldTermStatesMap(
termStatesReadBuffer,
termStateSerializer,
blockHeader,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
index 84360b9..50a17bc 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
@@ -47,13 +47,15 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
public class STUniformSplitTermsReader extends UniformSplitTermsReader {
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
- super(postingsReader, state, blockDecoder, NAME, VERSION_START,
- VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+ this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
+ NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
}
- protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
- String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
- super(postingsReader, state, blockDecoder, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
+ protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
+ BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+ String codecName, int versionStart, int versionCurrent,
+ String termsBlocksExtension, String dictionaryExtension) throws IOException {
+ super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
}
@Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
index d121301..ca15d6a 100755
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
@@ -88,13 +88,14 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
- this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+ this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
+ NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
}
protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
- int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+ int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
- super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
+ super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, fieldMetadataWriter, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
}
@Override
@@ -200,7 +201,7 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
int fieldsNumber = 0;
for (FieldMetadata fieldMetadata : fieldMetadataList) {
if (fieldMetadata.getNumTerms() > 0) {
- fieldMetadata.write(fieldsOutput);
+ fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
fieldsNumber++;
}
}