You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/08/17 12:59:25 UTC
svn commit: r1514978 - in /lucene/dev/branches/lucene3069/lucene:
codecs/src/java/org/apache/lucene/codecs/temp/
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/codecs/temp/
Author: han
Date: Sat Aug 17 10:59:25 2013
New Revision: 1514978
URL: http://svn.apache.org/r1514978
Log:
LUCENE-5179: refactoring on PostingsWriterBase for delta-encoding
Modified:
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java Sat Aug 17 10:59:25 2013
@@ -28,6 +28,7 @@ import org.apache.lucene.codecs.FieldsCo
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -113,7 +114,7 @@ public class TempBlockTermsWriter extend
//System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.start(out); // have consumer write its format/header
+ postingsWriter.init(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
@@ -166,9 +167,7 @@ public class TempBlockTermsWriter extend
private static class TermEntry {
public final BytesRef term = new BytesRef();
- public TermStats stats;
- public long[] longs;
- public byte[] bytes;
+ public BlockTermState state;
}
class TermsWriter extends TermsConsumer {
@@ -245,12 +244,10 @@ public class TempBlockTermsWriter extend
}
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
- te.stats = stats;
- te.longs = new long[longsSize];
- postingsWriter.finishTerm(te.longs, bytesWriter, stats);
- te.bytes = new byte[(int) bytesWriter.getFilePointer()];
- bytesWriter.writeTo(te.bytes, 0);
- bytesWriter.reset();
+ te.state = postingsWriter.newTermState();
+ te.state.docFreq = stats.docFreq;
+ te.state.totalTermFreq = stats.totalTermFreq;
+ postingsWriter.finishTerm(te.state);
pendingCount++;
numTerms++;
@@ -297,6 +294,7 @@ public class TempBlockTermsWriter extend
}
private final RAMOutputStream bytesWriter = new RAMOutputStream();
+ private final RAMOutputStream bufferWriter = new RAMOutputStream();
private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@@ -330,11 +328,11 @@ public class TempBlockTermsWriter extend
// TODO: cutover to better intblock codec. simple64?
// write prefix, suffix first:
for(int termCount=0;termCount<pendingCount;termCount++) {
- final TermStats stats = pendingTerms[termCount].stats;
- assert stats != null;
- bytesWriter.writeVInt(stats.docFreq);
+ final BlockTermState state = pendingTerms[termCount].state;
+ assert state != null;
+ bytesWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
+ bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
}
}
out.writeVInt((int) bytesWriter.getFilePointer());
@@ -342,16 +340,17 @@ public class TempBlockTermsWriter extend
bytesWriter.reset();
// 4th pass: write the metadata
- long[] lastLongs = new long[longsSize];
- Arrays.fill(lastLongs, 0);
+ long[] longs = new long[longsSize];
+ boolean absolute = true;
for(int termCount=0;termCount<pendingCount;termCount++) {
- final long[] longs = pendingTerms[termCount].longs;
- final byte[] bytes = pendingTerms[termCount].bytes;
+ final BlockTermState state = pendingTerms[termCount].state;
+ postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
for (int i = 0; i < longsSize; i++) {
- bytesWriter.writeVLong(longs[i] - lastLongs[i]);
+ bytesWriter.writeVLong(longs[i]);
}
- lastLongs = longs;
- bytesWriter.writeBytes(bytes, 0, bytes.length);
+ bufferWriter.writeTo(bytesWriter);
+ bufferWriter.reset();
+ absolute = false;
}
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/TempPostingsWriterBase.java Sat Aug 17 10:59:25 2013
@@ -49,7 +49,10 @@ public abstract class TempPostingsWriter
/** Called once after startup, before any terms have been
* added. Implementations typically write a header to
* the provided {@code termsOut}. */
- public abstract void start(IndexOutput termsOut) throws IOException;
+ public abstract void init(IndexOutput termsOut) throws IOException;
+
+ /** Return a newly created empty TermState */
+ public abstract BlockTermState newTermState() throws IOException;
/** Start a new term. Note that a matching call to {@link
* #finishTerm(long[], DataOutput, TermStats)} is done, only if the term has at least one
@@ -57,8 +60,15 @@ public abstract class TempPostingsWriter
public abstract void startTerm() throws IOException;
/** Finishes the current term. The provided {@link
- * TermStats} contains the term's summary statistics. */
- public abstract void finishTerm(long[] longs, DataOutput out, TermStats stats) throws IOException;
+ * BlockTermState} contains the term's summary statistics,
+ * and will holds metadata from PBF when returned*/
+ public abstract void finishTerm(BlockTermState state) throws IOException;
+
+ /**
+ * Encode metadata as long[] and byte[]. {@code absolute} controls
+ * whether current term is delta encoded according to latest term.
+ */
+ public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
/**
* Return the fixed length of longs,
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java Sat Aug 17 10:59:25 2013
@@ -42,6 +42,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
@@ -310,7 +311,7 @@ public class TempBlockTreeTermsWriter ex
// System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.start(out); // have consumer write its format/header
+ postingsWriter.init(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
@@ -364,18 +365,13 @@ public class TempBlockTreeTermsWriter ex
private static final class PendingTerm extends PendingEntry {
public final BytesRef term;
- // stats
- public final TermStats stats;
- // metadata
- public long[] longs;
- public byte[] bytes;
+ // stats + metadata
+ public final BlockTermState state;
- public PendingTerm(BytesRef term, TermStats stats, long[] longs, byte[] bytes) {
+ public PendingTerm(BytesRef term, BlockTermState state) {
super(true);
this.term = term;
- this.stats = stats;
- this.longs = longs;
- this.bytes = bytes;
+ this.state = state;
}
@Override
@@ -857,14 +853,15 @@ public class TempBlockTreeTermsWriter ex
int termCount;
- long[] lastLongs = new long[longsSize];
- Arrays.fill(lastLongs, 0);
+ long[] longs = new long[longsSize];
+ boolean absolute = true;
if (isLeafBlock) {
subIndices = null;
for (PendingEntry ent : slice) {
assert ent.isTerm;
PendingTerm term = (PendingTerm) ent;
+ BlockTermState state = term.state;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
@@ -877,19 +874,21 @@ public class TempBlockTreeTermsWriter ex
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
- statsWriter.writeVInt(term.stats.docFreq);
+ statsWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- assert term.stats.totalTermFreq >= term.stats.docFreq: term.stats.totalTermFreq + " vs " + term.stats.docFreq;
- statsWriter.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
+ assert state.totalTermFreq >= state.docFreq: state.totalTermFreq + " vs " + state.docFreq;
+ statsWriter.writeVLong(state.totalTermFreq - state.docFreq);
}
// Write term meta data
+ postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
- assert term.longs[pos] >= 0;
- metaWriter.writeVLong(term.longs[pos] - lastLongs[pos]);
+ assert longs[pos] >= 0;
+ metaWriter.writeVLong(longs[pos]);
}
- lastLongs = term.longs;
- metaWriter.writeBytes(term.bytes, 0, term.bytes.length);
+ bytesWriter.writeTo(metaWriter);
+ bytesWriter.reset();
+ absolute = false;
}
termCount = length;
} else {
@@ -898,6 +897,7 @@ public class TempBlockTreeTermsWriter ex
for (PendingEntry ent : slice) {
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
+ BlockTermState state = term.state;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
@@ -911,10 +911,10 @@ public class TempBlockTreeTermsWriter ex
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
- statsWriter.writeVInt(term.stats.docFreq);
+ statsWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- assert term.stats.totalTermFreq >= term.stats.docFreq;
- statsWriter.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
+ assert state.totalTermFreq >= state.docFreq;
+ statsWriter.writeVLong(state.totalTermFreq - state.docFreq);
}
// TODO: now that terms dict "sees" these longs,
@@ -926,12 +926,14 @@ public class TempBlockTreeTermsWriter ex
// separate anymore:
// Write term meta data
+ postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
- assert term.longs[pos] >= 0;
- metaWriter.writeVLong(term.longs[pos] - lastLongs[pos]);
+ assert longs[pos] >= 0;
+ metaWriter.writeVLong(longs[pos]);
}
- lastLongs = term.longs;
- metaWriter.writeBytes(term.bytes, 0, term.bytes.length);
+ bytesWriter.writeTo(metaWriter);
+ bytesWriter.reset();
+ absolute = false;
termCount++;
} else {
@@ -1045,14 +1047,12 @@ public class TempBlockTreeTermsWriter ex
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
+ BlockTermState state = postingsWriter.newTermState();
+ state.docFreq = stats.docFreq;
+ state.totalTermFreq = stats.totalTermFreq;
+ postingsWriter.finishTerm(state);
- long[] longs = new long[longsSize];
- postingsWriter.finishTerm(longs, metaWriter, stats);
- byte[] bytes = new byte[(int)metaWriter.getFilePointer()];
- metaWriter.writeTo(bytes, 0);
- metaWriter.reset();
-
- PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), stats, longs, bytes);
+ PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
pending.add(term);
numTerms++;
}
@@ -1104,6 +1104,7 @@ public class TempBlockTreeTermsWriter ex
private final RAMOutputStream suffixWriter = new RAMOutputStream();
private final RAMOutputStream statsWriter = new RAMOutputStream();
private final RAMOutputStream metaWriter = new RAMOutputStream();
+ private final RAMOutputStream bytesWriter = new RAMOutputStream();
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java Sat Aug 17 10:59:25 2013
@@ -37,6 +37,7 @@ import org.apache.lucene.util.fst.Builde
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
@@ -75,7 +76,7 @@ public class TempFSTOrdTermsWriter exten
this.blockOut = state.directory.createOutput(termsBlockFileName, state.context);
writeHeader(indexOut);
writeHeader(blockOut);
- this.postingsWriter.start(blockOut);
+ this.postingsWriter.init(blockOut);
success = true;
} finally {
if (!success) {
@@ -218,9 +219,14 @@ public class TempFSTOrdTermsWriter exten
} else {
statsOut.writeVInt(stats.docFreq);
}
- postingsWriter.finishTerm(longs, metaBytesOut, stats);
+ BlockTermState state = postingsWriter.newTermState();
+ state.docFreq = stats.docFreq;
+ state.totalTermFreq = stats.totalTermFreq;
+ postingsWriter.finishTerm(state);
+ postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, false);
for (int i = 0; i < longsSize; i++) {
- metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
+ metaLongsOut.writeVLong(longs[i]);
+ lastLongs[i] += longs[i];
}
metaLongsOut.writeVLong(metaBytesOut.getFilePointer() - lastMetaBytesFP);
@@ -228,7 +234,6 @@ public class TempFSTOrdTermsWriter exten
numTerms++;
lastMetaBytesFP = metaBytesOut.getFilePointer();
- lastLongs = longs;
}
@Override
@@ -260,7 +265,7 @@ public class TempFSTOrdTermsWriter exten
lastBlockStatsFP = statsOut.getFilePointer();
lastBlockMetaLongsFP = metaLongsOut.getFilePointer();
lastBlockMetaBytesFP = metaBytesOut.getFilePointer();
- lastBlockLongs = lastLongs;
+ System.arraycopy(lastLongs, 0, lastBlockLongs, 0, longsSize);
}
}
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java Sat Aug 17 10:59:25 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
@@ -67,7 +68,7 @@ public class TempFSTTermsWriter extends
boolean success = false;
try {
writeHeader(out);
- this.postingsWriter.start(out);
+ this.postingsWriter.init(out);
success = true;
} finally {
if (!success) {
@@ -167,12 +168,14 @@ public class TempFSTTermsWriter extends
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// write term meta data into fst
+ final BlockTermState state = postingsWriter.newTermState();
final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData();
meta.longs = new long[longsSize];
meta.bytes = null;
- meta.docFreq = stats.docFreq;
- meta.totalTermFreq = stats.totalTermFreq;
- postingsWriter.finishTerm(meta.longs, metaWriter, stats);
+ meta.docFreq = state.docFreq = stats.docFreq;
+ meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
+ postingsWriter.finishTerm(state);
+ postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
final int bytesSize = (int)metaWriter.getFilePointer();
if (bytesSize > 0) {
meta.bytes = new byte[bytesSize];
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java Sat Aug 17 10:59:25 2013
@@ -154,11 +154,6 @@ public final class TempPostingsReader ex
// freq is always implicitly totalTermFreq in this case.
int singletonDocID;
- // Only used by the "primary" TermState -- clones don't
- // copy this (basically they are "transient"):
- ByteArrayDataInput bytesReader; // TODO: should this NOT be in the TermState...?
- byte[] bytes;
-
@Override
public IntBlockTermState clone() {
IntBlockTermState other = new IntBlockTermState();
@@ -176,11 +171,6 @@ public final class TempPostingsReader ex
lastPosBlockOffset = other.lastPosBlockOffset;
skipOffset = other.skipOffset;
singletonDocID = other.singletonDocID;
-
- // Do not copy bytes, bytesReader (else TermState is
- // very heavy, ie drags around the entire block's
- // byte[]). On seek back, if next() is in fact used
- // (rare!), they will be re-read from disk.
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java?rev=1514978&r1=1514977&r2=1514978&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java Sat Aug 17 10:59:25 2013
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
@@ -74,13 +75,16 @@ public final class TempPostingsWriter ex
final IndexOutput posOut;
final IndexOutput payOut;
+ final static IntBlockTermState emptyState = new IntBlockTermState();
+ IntBlockTermState lastState;
+
// How current field indexes postings:
private boolean fieldHasFreqs;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
- // Holds starting file pointers for each term:
+ // Holds starting file pointers for current term:
private long docTermStartFP;
private long posTermStartFP;
private long payTermStartFP;
@@ -189,8 +193,22 @@ public final class TempPostingsWriter ex
this(state, PackedInts.COMPACT);
}
+ private final static class IntBlockTermState extends BlockTermState {
+ long docTermStartFP = 0;
+ long posTermStartFP = 0;
+ long payTermStartFP = 0;
+ long skipOffset = -1;
+ long lastPosBlockOffset = -1;
+ int singletonDocID = -1;
+ }
+
+ @Override
+ public IntBlockTermState newTermState() {
+ return new IntBlockTermState();
+ }
+
@Override
- public void start(IndexOutput termsOut) throws IOException {
+ public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
termsOut.writeVInt(BLOCK_SIZE);
}
@@ -205,6 +223,7 @@ public final class TempPostingsWriter ex
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
fieldHasPayloads = fieldInfo.hasPayloads();
skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
+ lastState = emptyState;
if (fieldHasPositions) {
if (fieldHasPayloads || fieldHasOffsets) {
return 3; // doc + pos + pay FP
@@ -359,19 +378,18 @@ public final class TempPostingsWriter ex
}
}
- // nocommit explain about the "don't care" values
-
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(long[] longs, DataOutput out, TermStats stats) throws IOException {
- assert stats.docFreq > 0;
+ public void finishTerm(BlockTermState _state) throws IOException {
+ IntBlockTermState state = (IntBlockTermState) _state;
+ assert state.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
- assert stats.docFreq == docCount: stats.docFreq + " vs " + docCount;
+ assert state.docFreq == docCount: state.docFreq + " vs " + docCount;
// if (DEBUG) {
- // System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
+ // System.out.println("FPW.finishTerm docFreq=" + state.docFreq);
// }
// if (DEBUG) {
@@ -382,7 +400,7 @@ public final class TempPostingsWriter ex
// docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
final int singletonDocID;
- if (stats.docFreq == 1) {
+ if (state.docFreq == 1) {
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
singletonDocID = docDeltaBuffer[0];
} else {
@@ -413,8 +431,8 @@ public final class TempPostingsWriter ex
// totalTermFreq is just total number of positions(or payloads, or offsets)
// associated with current term.
- assert stats.totalTermFreq != -1;
- if (stats.totalTermFreq > BLOCK_SIZE) {
+ assert state.totalTermFreq != -1;
+ if (state.totalTermFreq > BLOCK_SIZE) {
// record file offset for last pos in last block
lastPosBlockOffset = posOut.getFilePointer() - posTermStartFP;
} else {
@@ -479,7 +497,7 @@ public final class TempPostingsWriter ex
}
}
// if (DEBUG) {
- // System.out.println(" totalTermFreq=" + stats.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
+ // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
// }
} else {
lastPosBlockOffset = -1;
@@ -498,35 +516,48 @@ public final class TempPostingsWriter ex
// System.out.println(" no skip: docCount=" + docCount);
// }
}
-
// if (DEBUG) {
// System.out.println(" payStartFP=" + payStartFP);
// }
+ state.docTermStartFP = docTermStartFP;
+ state.posTermStartFP = posTermStartFP;
+ state.payTermStartFP = payTermStartFP;
+ state.singletonDocID = singletonDocID;
+ state.skipOffset = skipOffset;
+ state.lastPosBlockOffset = lastPosBlockOffset;
+ docBufferUpto = 0;
+ posBufferUpto = 0;
+ lastDocID = 0;
+ docCount = 0;
+ }
+
+ // nocommit explain about the "don't care" values
- // write metadata
- longs[0] = docTermStartFP;
+ @Override
+ public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+ IntBlockTermState state = (IntBlockTermState)_state;
+ if (absolute) {
+ lastState = emptyState;
+ }
+ longs[0] = state.docTermStartFP - lastState.docTermStartFP;
if (fieldHasPositions) {
- longs[1] = posTermStartFP;
+ longs[1] = state.posTermStartFP - lastState.posTermStartFP;
if (fieldHasPayloads || fieldHasOffsets) {
- longs[2] = payTermStartFP;
+ longs[2] = state.payTermStartFP - lastState.payTermStartFP;
}
}
- if (singletonDocID != -1) {
- out.writeVInt(singletonDocID);
+ if (state.singletonDocID != -1) {
+ out.writeVInt(state.singletonDocID);
}
if (fieldHasPositions) {
- if (lastPosBlockOffset != -1) {
- out.writeVLong(lastPosBlockOffset);
+ if (state.lastPosBlockOffset != -1) {
+ out.writeVLong(state.lastPosBlockOffset);
}
}
- if (skipOffset != -1) {
- out.writeVLong(skipOffset);
+ if (state.skipOffset != -1) {
+ out.writeVLong(state.skipOffset);
}
-
- docBufferUpto = 0;
- posBufferUpto = 0;
- lastDocID = 0;
- docCount = 0;
+ lastState = state;
}
@Override