You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/09/09 18:07:57 UTC
svn commit: r1521173 [1/2] - in /lucene/dev/trunk: ./ dev-tools/ lucene/
lucene/analysis/ lucene/analysis/common/ lucene/benchmark/
lucene/classification/ lucene/codecs/
lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/
lucene/codecs/src/java...
Author: han
Date: Mon Sep 9 16:07:56 2013
New Revision: 1521173
URL: http://svn.apache.org/r1521173
Log:
LUCENE-3069: Lucene should have an entirely memory resident term dictionary
Added:
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
- copied unchanged from r1520740, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
- copied unchanged from r1520740, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java
- copied unchanged from r1520740, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
- copied unchanged from r1520740, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
- copied unchanged from r1520740, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/
- copied from r1520740, lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/dev-tools/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/CHANGES.txt (props changed)
lucene/dev/trunk/lucene/analysis/ (props changed)
lucene/dev/trunk/lucene/analysis/common/ (props changed)
lucene/dev/trunk/lucene/benchmark/ (props changed)
lucene/dev/trunk/lucene/build.xml (props changed)
lucene/dev/trunk/lucene/classification/ (props changed)
lucene/dev/trunk/lucene/codecs/ (props changed)
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
lucene/dev/trunk/lucene/common-build.xml (props changed)
lucene/dev/trunk/lucene/core/ (props changed)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
lucene/dev/trunk/lucene/demo/ (props changed)
lucene/dev/trunk/lucene/facet/ (props changed)
lucene/dev/trunk/lucene/grouping/ (props changed)
lucene/dev/trunk/lucene/highlighter/ (props changed)
lucene/dev/trunk/lucene/join/ (props changed)
lucene/dev/trunk/lucene/licenses/ (props changed)
lucene/dev/trunk/lucene/memory/ (props changed)
lucene/dev/trunk/lucene/misc/ (props changed)
lucene/dev/trunk/lucene/module-build.xml (props changed)
lucene/dev/trunk/lucene/queries/ (props changed)
lucene/dev/trunk/lucene/queryparser/ (props changed)
lucene/dev/trunk/lucene/replicator/ (props changed)
lucene/dev/trunk/lucene/sandbox/ (props changed)
lucene/dev/trunk/lucene/site/ (props changed)
lucene/dev/trunk/lucene/spatial/ (props changed)
lucene/dev/trunk/lucene/suggest/ (props changed)
lucene/dev/trunk/lucene/test-framework/ (props changed)
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
lucene/dev/trunk/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/trunk/lucene/tools/ (props changed)
lucene/dev/trunk/solr/ (props changed)
lucene/dev/trunk/solr/CHANGES.txt (props changed)
lucene/dev/trunk/solr/NOTICE.txt (props changed)
lucene/dev/trunk/solr/build.xml (props changed)
lucene/dev/trunk/solr/common-build.xml (props changed)
lucene/dev/trunk/solr/contrib/ (props changed)
lucene/dev/trunk/solr/core/ (props changed)
lucene/dev/trunk/solr/example/ (props changed)
lucene/dev/trunk/solr/licenses/ (props changed)
lucene/dev/trunk/solr/site/ (props changed)
lucene/dev/trunk/solr/solrj/ (props changed)
lucene/dev/trunk/solr/test-framework/ (props changed)
lucene/dev/trunk/solr/webapp/ (props changed)
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Mon Sep 9 16:07:56 2013
@@ -142,6 +142,7 @@ public class BlockTermsReader extends Fi
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
+ final int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@@ -151,7 +152,7 @@ public class BlockTermsReader extends Fi
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
- FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
+ FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
}
@@ -230,8 +231,9 @@ public class BlockTermsReader extends Fi
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
+ final int longsSize;
- FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
@@ -239,6 +241,7 @@ public class BlockTermsReader extends Fi
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
+ this.longsSize = longsSize;
}
@Override
@@ -326,6 +329,10 @@ public class BlockTermsReader extends Fi
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;
+ private long[] longs;
+ private byte[] bytes;
+ private ByteArrayDataInput bytesReader;
+
public SegmentTermsEnum() throws IOException {
in = BlockTermsReader.this.in.clone();
in.seek(termsStartPointer);
@@ -339,6 +346,7 @@ public class BlockTermsReader extends Fi
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
+ longs = new long[longsSize];
}
@Override
@@ -415,7 +423,7 @@ public class BlockTermsReader extends Fi
assert result;
indexIsCurrent = true;
- didIndexNext = false;
+ didIndexNext = false;
if (doOrd) {
state.ord = indexEnum.ord()-1;
@@ -789,11 +797,20 @@ public class BlockTermsReader extends Fi
//System.out.println(" freq bytes len=" + len);
in.readBytes(docFreqBytes, 0, len);
freqReader.reset(docFreqBytes, 0, len);
- metaDataUpto = 0;
- state.termBlockOrd = 0;
+ // metadata
+ len = in.readVInt();
+ if (bytes == null) {
+ bytes = new byte[ArrayUtil.oversize(len, 1)];
+ bytesReader = new ByteArrayDataInput();
+ } else if (bytes.length < len) {
+ bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ in.readBytes(bytes, 0, len);
+ bytesReader.reset(bytes, 0, len);
- postingsReader.readTermsBlock(in, fieldInfo, state);
+ metaDataUpto = 0;
+ state.termBlockOrd = 0;
indexIsCurrent = false;
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
@@ -811,9 +828,7 @@ public class BlockTermsReader extends Fi
// lazily catch up on metadata decode:
final int limit = state.termBlockOrd;
- // We must set/incr state.termCount because
- // postings impl can look at this
- state.termBlockOrd = metaDataUpto;
+ boolean absolute = metaDataUpto == 0;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
@@ -825,16 +840,21 @@ public class BlockTermsReader extends Fi
// TODO: if docFreq were bulk decoded we could
// just skipN here:
+
+ // docFreq, totalTermFreq
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}
-
- postingsReader.nextTerm(fieldInfo, state);
+ // metadata
+ for (int i = 0; i < longs.length; i++) {
+ longs[i] = bytesReader.readVLong();
+ }
+ postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
- state.termBlockOrd++;
+ absolute = false;
}
} else {
//System.out.println(" skip! seekPending");
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Mon Sep 9 16:07:56 2013
@@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsCo
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -59,7 +60,8 @@ public class BlockTermsWriter extends Fi
// Initial format
public static final int VERSION_START = 0;
public static final int VERSION_APPEND_ONLY = 1;
- public static final int VERSION_CURRENT = VERSION_APPEND_ONLY;
+ public static final int VERSION_META_ARRAY = 2;
+ public static final int VERSION_CURRENT = VERSION_META_ARRAY;
/** Extension of terms file */
static final String TERMS_EXTENSION = "tib";
@@ -77,8 +79,9 @@ public class BlockTermsWriter extends Fi
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
+ public final int longsSize;
- public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
@@ -86,6 +89,7 @@ public class BlockTermsWriter extends Fi
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
+ this.longsSize = longsSize;
}
}
@@ -109,7 +113,7 @@ public class BlockTermsWriter extends Fi
//System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.start(out); // have consumer write its format/header
+ postingsWriter.init(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
@@ -133,9 +137,7 @@ public class BlockTermsWriter extends Fi
@Override
public void close() throws IOException {
-
try {
-
final long dirStart = out.getFilePointer();
out.writeVInt(fields.size());
@@ -148,6 +150,9 @@ public class BlockTermsWriter extends Fi
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
+ if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+ out.writeVInt(field.longsSize);
+ }
}
writeTrailer(dirStart);
} finally {
@@ -161,7 +166,7 @@ public class BlockTermsWriter extends Fi
private static class TermEntry {
public final BytesRef term = new BytesRef();
- public TermStats stats;
+ public BlockTermState state;
}
class TermsWriter extends TermsConsumer {
@@ -173,6 +178,7 @@ public class BlockTermsWriter extends Fi
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
+ int longsSize;
private TermEntry[] pendingTerms;
@@ -190,8 +196,8 @@ public class BlockTermsWriter extends Fi
pendingTerms[i] = new TermEntry();
}
termsStartPointer = out.getFilePointer();
- postingsWriter.setField(fieldInfo);
this.postingsWriter = postingsWriter;
+ this.longsSize = postingsWriter.setField(fieldInfo);
}
@Override
@@ -237,11 +243,12 @@ public class BlockTermsWriter extends Fi
}
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
- te.stats = stats;
+ te.state = postingsWriter.newTermState();
+ te.state.docFreq = stats.docFreq;
+ te.state.totalTermFreq = stats.totalTermFreq;
+ postingsWriter.finishTerm(te.state);
pendingCount++;
-
- postingsWriter.finishTerm(stats);
numTerms++;
}
@@ -264,7 +271,8 @@ public class BlockTermsWriter extends Fi
termsStartPointer,
sumTotalTermFreq,
sumDocFreq,
- docCount));
+ docCount,
+ longsSize));
}
}
@@ -285,6 +293,7 @@ public class BlockTermsWriter extends Fi
}
private final RAMOutputStream bytesWriter = new RAMOutputStream();
+ private final RAMOutputStream bufferWriter = new RAMOutputStream();
private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@@ -318,19 +327,34 @@ public class BlockTermsWriter extends Fi
// TODO: cutover to better intblock codec. simple64?
// write prefix, suffix first:
for(int termCount=0;termCount<pendingCount;termCount++) {
- final TermStats stats = pendingTerms[termCount].stats;
- assert stats != null;
- bytesWriter.writeVInt(stats.docFreq);
+ final BlockTermState state = pendingTerms[termCount].state;
+ assert state != null;
+ bytesWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
+ bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
}
}
+ out.writeVInt((int) bytesWriter.getFilePointer());
+ bytesWriter.writeTo(out);
+ bytesWriter.reset();
+ // 4th pass: write the metadata
+ long[] longs = new long[longsSize];
+ boolean absolute = true;
+ for(int termCount=0;termCount<pendingCount;termCount++) {
+ final BlockTermState state = pendingTerms[termCount].state;
+ postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
+ for (int i = 0; i < longsSize; i++) {
+ bytesWriter.writeVLong(longs[i]);
+ }
+ bufferWriter.writeTo(bytesWriter);
+ bufferWriter.reset();
+ absolute = false;
+ }
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
- postingsWriter.flushTermsBlock(pendingCount, pendingCount);
lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
pendingCount = 0;
}
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java Mon Sep 9 16:07:56 2013
@@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intbloc
import java.io.IOException;
import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
/** Abstract base class that writes fixed-size blocks of ints
@@ -51,7 +52,7 @@ public abstract class FixedIntBlockIndex
protected abstract void flushBlock() throws IOException;
@Override
- public IntIndexOutput.Index index() throws IOException {
+ public IntIndexOutput.Index index() {
return new Index();
}
@@ -79,7 +80,7 @@ public abstract class FixedIntBlockIndex
}
@Override
- public void write(IndexOutput indexOut, boolean absolute) throws IOException {
+ public void write(DataOutput indexOut, boolean absolute) throws IOException {
if (absolute) {
indexOut.writeVInt(upto);
indexOut.writeVLong(fp);
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java Mon Sep 9 16:07:56 2013
@@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intbloc
import java.io.IOException;
import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
// TODO: much of this can be shared code w/ the fixed case
@@ -60,7 +61,7 @@ public abstract class VariableIntBlockIn
protected abstract int add(int value) throws IOException;
@Override
- public IntIndexOutput.Index index() throws IOException {
+ public IntIndexOutput.Index index() {
return new Index();
}
@@ -88,7 +89,7 @@ public abstract class VariableIntBlockIn
}
@Override
- public void write(IndexOutput indexOut, boolean absolute) throws IOException {
+ public void write(DataOutput indexOut, boolean absolute) throws IOException {
assert upto >= 0;
if (absolute) {
indexOut.writeVInt(upto);
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html Mon Sep 9 16:07:56 2013
@@ -20,6 +20,6 @@
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
-Postings and DocValues formats that are read entirely into memory.
+Term dictionary, DocValues or Postings formats that are read entirely into memory.
</body>
-</html>
\ No newline at end of file
+</html>
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Mon Sep 9 16:07:56 2013
@@ -79,7 +79,7 @@ public abstract class PulsingPostingsFor
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
- pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
+ pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
@@ -98,7 +98,7 @@ public abstract class PulsingPostingsFor
boolean success = false;
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
- pulsingReader = new PulsingPostingsReader(docsReader);
+ pulsingReader = new PulsingPostingsReader(state, docsReader);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Mon Sep 9 16:07:56 2013
@@ -20,16 +20,20 @@ package org.apache.lucene.codecs.pulsing
import java.io.IOException;
import java.util.IdentityHashMap;
import java.util.Map;
+import java.util.TreeMap;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Attribute;
@@ -37,6 +41,7 @@ import org.apache.lucene.util.AttributeI
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
/** Concrete class that reads the current doc/freq/skip
* postings format
@@ -50,28 +55,53 @@ public class PulsingPostingsReader exten
// Fallback reader for non-pulsed terms:
final PostingsReaderBase wrappedPostingsReader;
+ final SegmentReadState segmentState;
int maxPositions;
+ int version;
+ TreeMap<Integer, Integer> fields;
- public PulsingPostingsReader(PostingsReaderBase wrappedPostingsReader) {
+ public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader) {
this.wrappedPostingsReader = wrappedPostingsReader;
+ this.segmentState = state;
}
@Override
public void init(IndexInput termsIn) throws IOException {
- CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
- PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_START);
+ version = CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
+ PulsingPostingsWriter.VERSION_START,
+ PulsingPostingsWriter.VERSION_CURRENT);
maxPositions = termsIn.readVInt();
wrappedPostingsReader.init(termsIn);
+ if (wrappedPostingsReader instanceof PulsingPostingsReader ||
+ version < PulsingPostingsWriter.VERSION_META_ARRAY) {
+ fields = null;
+ } else {
+ fields = new TreeMap<Integer, Integer>();
+ String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION);
+ IndexInput in = null;
+ try {
+ in = segmentState.directory.openInput(summaryFileName, segmentState.context);
+ CodecUtil.checkHeader(in, PulsingPostingsWriter.CODEC, version,
+ PulsingPostingsWriter.VERSION_CURRENT);
+ int numField = in.readVInt();
+ for (int i = 0; i < numField; i++) {
+ int fieldNum = in.readVInt();
+ int longsSize = in.readVInt();
+ fields.put(fieldNum, longsSize);
+ }
+ } finally {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
}
private static class PulsingTermState extends BlockTermState {
+ private boolean absolute = false;
+ private long[] longs;
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
private BlockTermState wrappedTermState;
- ByteArrayDataInput inlinedBytesReader;
- private byte[] inlinedBytes;
-
@Override
public PulsingTermState clone() {
PulsingTermState clone;
@@ -82,6 +112,11 @@ public class PulsingPostingsReader exten
} else {
assert wrappedTermState != null;
clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
+ clone.absolute = absolute;
+ if (longs != null) {
+ clone.longs = new long[longs.length];
+ System.arraycopy(longs, 0, clone.longs, 0, longs.length);
+ }
}
return clone;
}
@@ -99,11 +134,6 @@ public class PulsingPostingsReader exten
} else {
wrappedTermState.copyFrom(other.wrappedTermState);
}
-
- // NOTE: we do not copy the
- // inlinedBytes/inlinedBytesReader; these are only
- // stored on the "primary" TermState. They are
- // "transient" to cloned term states.
}
@Override
@@ -117,25 +147,6 @@ public class PulsingPostingsReader exten
}
@Override
- public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
- //System.out.println("PR.readTermsBlock state=" + _termState);
- final PulsingTermState termState = (PulsingTermState) _termState;
- if (termState.inlinedBytes == null) {
- termState.inlinedBytes = new byte[128];
- termState.inlinedBytesReader = new ByteArrayDataInput();
- }
- int len = termsIn.readVInt();
- //System.out.println(" len=" + len + " fp=" + termsIn.getFilePointer());
- if (termState.inlinedBytes.length < len) {
- termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
- }
- termsIn.readBytes(termState.inlinedBytes, 0, len);
- termState.inlinedBytesReader.reset(termState.inlinedBytes);
- termState.wrappedTermState.termBlockOrd = 0;
- wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
- }
-
- @Override
public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
@@ -143,20 +154,20 @@ public class PulsingPostingsReader exten
}
@Override
- public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException {
//System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
-
+ assert empty.length == 0;
+ termState.absolute = termState.absolute || absolute;
// if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
-
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
- termState.postingsSize = termState.inlinedBytesReader.readVInt();
+ termState.postingsSize = in.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
@@ -164,16 +175,23 @@ public class PulsingPostingsReader exten
// (the blob holding all inlined terms' blobs for
// current term block) into another byte[] (just the
// blob for this term)...
- termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
+ in.readBytes(termState.postings, 0, termState.postingsSize);
//System.out.println(" inlined bytes=" + termState.postingsSize);
+ termState.absolute = termState.absolute || absolute;
} else {
//System.out.println(" not inlined");
+ final int longsSize = fields == null ? 0 : fields.get(fieldInfo.number);
+ if (termState.longs == null) {
+ termState.longs = new long[longsSize];
+ }
+ for (int i = 0; i < longsSize; i++) {
+ termState.longs[i] = in.readVLong();
+ }
termState.postingsSize = -1;
- // TODO: should we do full copyFrom? much heavier...?
termState.wrappedTermState.docFreq = termState.docFreq;
termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
- wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
- termState.wrappedTermState.termBlockOrd++;
+ wrappedPostingsReader.decodeTerm(termState.longs, in, fieldInfo, termState.wrappedTermState, termState.absolute);
+ termState.absolute = false;
}
}
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java Mon Sep 9 16:07:56 2013
@@ -21,14 +21,19 @@ import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
// TODO: we now inline based on total TF of the term,
// but it might be better to inline by "net bytes used"
@@ -49,26 +54,43 @@ public final class PulsingPostingsWriter
final static String CODEC = "PulsedPostingsWriter";
+ // recording field summary
+ final static String SUMMARY_EXTENSION = "smy";
+
// To add a new version, increment from the last one, and
// change VERSION_CURRENT to point to your new version:
final static int VERSION_START = 0;
- final static int VERSION_CURRENT = VERSION_START;
+ final static int VERSION_META_ARRAY = 1;
+
+ final static int VERSION_CURRENT = VERSION_META_ARRAY;
+ private SegmentWriteState segmentState;
private IndexOutput termsOut;
+ private List<FieldMetaData> fields;
+
private IndexOptions indexOptions;
private boolean storePayloads;
- private static class PendingTerm {
- private final byte[] bytes;
- public PendingTerm(byte[] bytes) {
- this.bytes = bytes;
+ // information for wrapped PF, in current field
+ private int longsSize;
+ private long[] longs;
+ boolean absolute;
+
+ private static class PulsingTermState extends BlockTermState {
+ private byte[] bytes;
+ private BlockTermState wrappedState;
+ @Override
+ public String toString() {
+ if (bytes != null) {
+ return "inlined";
+ } else {
+ return "not inlined wrapped=" + wrappedState;
+ }
}
}
- private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
// one entry per position
private final Position[] pending;
private int pendingCount = 0; // -1 once we've hit too many positions
@@ -83,6 +105,15 @@ public final class PulsingPostingsWriter
int endOffset;
}
+ private static final class FieldMetaData {
+ int fieldNumber;
+ int longsSize;
+ FieldMetaData(int number, int size) {
+ fieldNumber = number;
+ longsSize = size;
+ }
+ }
+
// TODO: -- lazy init this? ie, if every single term
// was inlined (eg for a "primary key" field) then we
// never need to use this fallback? Fallback writer for
@@ -92,23 +123,33 @@ public final class PulsingPostingsWriter
/** If the total number of positions (summed across all docs
* for this term) is <= maxPositions, then the postings are
* inlined into terms dict */
- public PulsingPostingsWriter(int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
+ public PulsingPostingsWriter(SegmentWriteState state, int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
+
pending = new Position[maxPositions];
for(int i=0;i<maxPositions;i++) {
pending[i] = new Position();
}
+ fields = new ArrayList<FieldMetaData>();
// We simply wrap another postings writer, but only call
// on it when tot positions is >= the cutoff:
this.wrappedPostingsWriter = wrappedPostingsWriter;
+ this.segmentState = state;
}
@Override
- public void start(IndexOutput termsOut) throws IOException {
+ public void init(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeVInt(pending.length); // encode maxPositions in header
- wrappedPostingsWriter.start(termsOut);
+ wrappedPostingsWriter.init(termsOut);
+ }
+
+ @Override
+ public BlockTermState newTermState() throws IOException {
+ PulsingTermState state = new PulsingTermState();
+ state.wrappedState = wrappedPostingsWriter.newTermState();
+ return state;
}
@Override
@@ -123,11 +164,15 @@ public final class PulsingPostingsWriter
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
- public void setField(FieldInfo fieldInfo) {
+ public int setField(FieldInfo fieldInfo) {
this.indexOptions = fieldInfo.getIndexOptions();
//if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
storePayloads = fieldInfo.hasPayloads();
- wrappedPostingsWriter.setField(fieldInfo);
+ absolute = false;
+ longsSize = wrappedPostingsWriter.setField(fieldInfo);
+ longs = new long[longsSize];
+ fields.add(new FieldMetaData(fieldInfo.number, longsSize));
+ return 0;
//DEBUG = BlockTreeTermsWriter.DEBUG;
}
@@ -219,18 +264,19 @@ public final class PulsingPostingsWriter
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats) throws IOException {
+ public void finishTerm(BlockTermState _state) throws IOException {
+ PulsingTermState state = (PulsingTermState) _state;
+
// if (DEBUG) System.out.println("PW finishTerm docCount=" + stats.docFreq + " pendingCount=" + pendingCount + " pendingTerms.size()=" + pendingTerms.size());
assert pendingCount > 0 || pendingCount == -1;
if (pendingCount == -1) {
- wrappedPostingsWriter.finishTerm(stats);
- // Must add null entry to record terms that our
- // wrapped postings impl added
- pendingTerms.add(null);
+ state.wrappedState.docFreq = state.docFreq;
+ state.wrappedState.totalTermFreq = state.totalTermFreq;
+ state.bytes = null;
+ wrappedPostingsWriter.finishTerm(state.wrappedState);
} else {
-
// There were few enough total occurrences for this
// term, so we fully inline our postings data into
// terms dict, now:
@@ -325,61 +371,54 @@ public final class PulsingPostingsWriter
}
}
- final byte[] bytes = new byte[(int) buffer.getFilePointer()];
- buffer.writeTo(bytes, 0);
- pendingTerms.add(new PendingTerm(bytes));
+ state.bytes = new byte[(int) buffer.getFilePointer()];
+ buffer.writeTo(state.bytes, 0);
buffer.reset();
}
-
pendingCount = 0;
}
@Override
- public void close() throws IOException {
- wrappedPostingsWriter.close();
- }
-
- @Override
- public void flushTermsBlock(int start, int count) throws IOException {
- // if (DEBUG) System.out.println("PW: flushTermsBlock start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size());
- int wrappedCount = 0;
- assert buffer.getFilePointer() == 0;
- assert start >= count;
-
- final int limit = pendingTerms.size() - start + count;
-
- for(int idx=pendingTerms.size()-start; idx<limit; idx++) {
- final PendingTerm term = pendingTerms.get(idx);
- if (term == null) {
- wrappedCount++;
- } else {
- buffer.writeVInt(term.bytes.length);
- buffer.writeBytes(term.bytes, 0, term.bytes.length);
+ public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+ PulsingTermState state = (PulsingTermState)_state;
+ assert empty.length == 0;
+ this.absolute = this.absolute || absolute;
+ if (state.bytes == null) {
+ wrappedPostingsWriter.encodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute);
+ for (int i = 0; i < longsSize; i++) {
+ out.writeVLong(longs[i]);
}
+ buffer.writeTo(out);
+ buffer.reset();
+ this.absolute = false;
+ } else {
+ out.writeVInt(state.bytes.length);
+ out.writeBytes(state.bytes, 0, state.bytes.length);
+ this.absolute = this.absolute || absolute;
}
+ }
- termsOut.writeVInt((int) buffer.getFilePointer());
- buffer.writeTo(termsOut);
- buffer.reset();
-
- // TDOO: this could be somewhat costly since
- // pendingTerms.size() could be biggish?
- int futureWrappedCount = 0;
- final int limit2 = pendingTerms.size();
- for(int idx=limit;idx<limit2;idx++) {
- if (pendingTerms.get(idx) == null) {
- futureWrappedCount++;
+ @Override
+ public void close() throws IOException {
+ wrappedPostingsWriter.close();
+ if (wrappedPostingsWriter instanceof PulsingPostingsWriter ||
+ VERSION_CURRENT < VERSION_META_ARRAY) {
+ return;
+ }
+ String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, SUMMARY_EXTENSION);
+ IndexOutput out = null;
+ try {
+ out = segmentState.directory.createOutput(summaryFileName, segmentState.context);
+ CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT);
+ out.writeVInt(fields.size());
+ for (FieldMetaData field : fields) {
+ out.writeVInt(field.fieldNumber);
+ out.writeVInt(field.longsSize);
}
+ out.close();
+ } finally {
+ IOUtils.closeWhileHandlingException(out);
}
-
- // Remove the terms we just wrote:
- pendingTerms.subList(pendingTerms.size()-start, limit).clear();
-
- // if (DEBUG) System.out.println("PW: len=" + buffer.getFilePointer() + " fp=" + termsOut.getFilePointer() + " futureWrappedCount=" + futureWrappedCount + " wrappedCount=" + wrappedCount);
- // TODO: can we avoid calling this if all terms
- // were inlined...? Eg for a "primary key" field, the
- // wrapped codec is never invoked...
- wrappedPostingsWriter.flushTermsBlock(futureWrappedCount+wrappedCount, wrappedCount);
}
// Pushes pending positions to the wrapped codec
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java Mon Sep 9 16:07:56 2013
@@ -20,7 +20,7 @@ package org.apache.lucene.codecs.sep;
// TODO: we may want tighter integration w/ IndexOutput --
// may give better perf:
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.DataOutput;
import java.io.IOException;
import java.io.Closeable;
@@ -49,12 +49,12 @@ public abstract class IntIndexOutput imp
/** Writes "location" of current output pointer of primary
* output to different output (out) */
- public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
+ public abstract void write(DataOutput indexOut, boolean absolute) throws IOException;
}
/** If you are indexing the primary output file, call
* this and interact with the returned IndexWriter. */
- public abstract Index index() throws IOException;
+ public abstract Index index();
@Override
public abstract void close() throws IOException;
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Mon Sep 9 16:07:56 2013
@@ -31,6 +31,7 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -115,15 +116,6 @@ public class SepPostingsReader extends P
long payloadFP;
long skipFP;
- // Only used for "primary" term state; these are never
- // copied on clone:
-
- // TODO: these should somehow be stored per-TermsEnum
- // not per TermState; maybe somehow the terms dict
- // should load/manage the byte[]/DataReader for us?
- byte[] bytes;
- ByteArrayDataInput bytesReader;
-
@Override
public SepTermState clone() {
SepTermState other = new SepTermState();
@@ -182,40 +174,21 @@ public class SepPostingsReader extends P
}
@Override
- public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
- final SepTermState termState = (SepTermState) _termState;
- //System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
- final int len = termsIn.readVInt();
- //System.out.println(" numBytes=" + len);
- if (termState.bytes == null) {
- termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
- termState.bytesReader = new ByteArrayDataInput(termState.bytes);
- } else if (termState.bytes.length < len) {
- termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
- }
- termState.bytesReader.reset(termState.bytes, 0, len);
- termsIn.readBytes(termState.bytes, 0, len);
- }
-
- @Override
- public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
+ throws IOException {
final SepTermState termState = (SepTermState) _termState;
- final boolean isFirstTerm = termState.termBlockOrd == 0;
- //System.out.println("SEPR.nextTerm termCount=" + termState.termBlockOrd + " isFirstTerm=" + isFirstTerm + " bytesReader.pos=" + termState.bytesReader.getPosition());
- //System.out.println(" docFreq=" + termState.docFreq);
- termState.docIndex.read(termState.bytesReader, isFirstTerm);
- //System.out.println(" docIndex=" + termState.docIndex);
+ termState.docIndex.read(in, absolute);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- termState.freqIndex.read(termState.bytesReader, isFirstTerm);
+ termState.freqIndex.read(in, absolute);
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" freqIndex=" + termState.freqIndex);
- termState.posIndex.read(termState.bytesReader, isFirstTerm);
+ termState.posIndex.read(in, absolute);
//System.out.println(" posIndex=" + termState.posIndex);
if (fieldInfo.hasPayloads()) {
- if (isFirstTerm) {
- termState.payloadFP = termState.bytesReader.readVLong();
+ if (absolute) {
+ termState.payloadFP = in.readVLong();
} else {
- termState.payloadFP += termState.bytesReader.readVLong();
+ termState.payloadFP += in.readVLong();
}
//System.out.println(" payloadFP=" + termState.payloadFP);
}
@@ -223,14 +196,14 @@ public class SepPostingsReader extends P
}
if (termState.docFreq >= skipMinimum) {
- //System.out.println(" readSkip @ " + termState.bytesReader.getPosition());
- if (isFirstTerm) {
- termState.skipFP = termState.bytesReader.readVLong();
+ //System.out.println(" readSkip @ " + in.getPosition());
+ if (absolute) {
+ termState.skipFP = in.readVLong();
} else {
- termState.skipFP += termState.bytesReader.readVLong();
+ termState.skipFP += in.readVLong();
}
//System.out.println(" skipFP=" + termState.skipFP);
- } else if (isFirstTerm) {
+ } else if (absolute) {
termState.skipFP = 0;
}
}
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java Mon Sep 9 16:07:56 2013
@@ -18,18 +18,17 @@ package org.apache.lucene.codecs.sep;
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
@@ -64,7 +63,6 @@ public final class SepPostingsWriter ext
IndexOutput payloadOut;
IndexOutput skipOut;
- IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables,
@@ -98,8 +96,9 @@ public final class SepPostingsWriter ext
int lastDocID;
int df;
- // Holds pending byte[] blob for the current terms block
- private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
+ SepTermState lastState;
+ long lastPayloadFP;
+ long lastSkipFP;
public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
this(state, factory, DEFAULT_SKIP_INTERVAL);
@@ -116,9 +115,10 @@ public final class SepPostingsWriter ext
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
+
docOut = factory.createOutput(state.directory, docFileName, state.context);
docIndex = docOut.index();
-
+
if (state.fieldInfos.hasFreq()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
@@ -134,7 +134,7 @@ public final class SepPostingsWriter ext
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION);
payloadOut = state.directory.createOutput(payloadFileName, state.context);
}
-
+
final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName, state.context);
@@ -155,8 +155,7 @@ public final class SepPostingsWriter ext
}
@Override
- public void start(IndexOutput termsOut) throws IOException {
- this.termsOut = termsOut;
+ public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
@@ -165,6 +164,11 @@ public final class SepPostingsWriter ext
}
@Override
+ public BlockTermState newTermState() {
+ return new SepTermState();
+ }
+
+ @Override
public void startTerm() throws IOException {
docIndex.mark();
//System.out.println("SEPW: startTerm docIndex=" + docIndex);
@@ -185,7 +189,7 @@ public final class SepPostingsWriter ext
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
- public void setField(FieldInfo fieldInfo) {
+ public int setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
this.indexOptions = fieldInfo.getIndexOptions();
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
@@ -193,6 +197,24 @@ public final class SepPostingsWriter ext
}
skipListWriter.setIndexOptions(indexOptions);
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
+ lastPayloadFP = 0;
+ lastSkipFP = 0;
+ lastState = setEmptyState();
+ return 0;
+ }
+
+ private SepTermState setEmptyState() {
+ SepTermState emptyState = new SepTermState();
+ emptyState.docIndex = docOut.index();
+ if (indexOptions != IndexOptions.DOCS_ONLY) {
+ emptyState.freqIndex = freqOut.index();
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ emptyState.posIndex = posOut.index();
+ }
+ }
+ emptyState.payloadFP = 0;
+ emptyState.skipFP = 0;
+ return emptyState;
}
/** Adds a new doc in this term. If this returns null
@@ -260,132 +282,86 @@ public final class SepPostingsWriter ext
lastPosition = 0;
}
- private static class PendingTerm {
- public final IntIndexOutput.Index docIndex;
- public final IntIndexOutput.Index freqIndex;
- public final IntIndexOutput.Index posIndex;
- public final long payloadFP;
- public final long skipFP;
-
- public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
- this.docIndex = docIndex;
- this.freqIndex = freqIndex;
- this.posIndex = posIndex;
- this.payloadFP = payloadFP;
- this.skipFP = skipFP;
- }
+ private static class SepTermState extends BlockTermState {
+ public IntIndexOutput.Index docIndex;
+ public IntIndexOutput.Index freqIndex;
+ public IntIndexOutput.Index posIndex;
+ public long payloadFP;
+ public long skipFP;
}
- private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats) throws IOException {
+ public void finishTerm(BlockTermState _state) throws IOException {
+ SepTermState state = (SepTermState)_state;
// TODO: -- wasteful we are counting this in two places?
- assert stats.docFreq > 0;
- assert stats.docFreq == df;
-
- final IntIndexOutput.Index docIndexCopy = docOut.index();
- docIndexCopy.copyFrom(docIndex, false);
+ assert state.docFreq > 0;
+ assert state.docFreq == df;
- final IntIndexOutput.Index freqIndexCopy;
- final IntIndexOutput.Index posIndexCopy;
+ state.docIndex = docOut.index();
+ state.docIndex.copyFrom(docIndex, false);
if (indexOptions != IndexOptions.DOCS_ONLY) {
- freqIndexCopy = freqOut.index();
- freqIndexCopy.copyFrom(freqIndex, false);
+ state.freqIndex = freqOut.index();
+ state.freqIndex.copyFrom(freqIndex, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- posIndexCopy = posOut.index();
- posIndexCopy.copyFrom(posIndex, false);
+ state.posIndex = posOut.index();
+ state.posIndex.copyFrom(posIndex, false);
} else {
- posIndexCopy = null;
+ state.posIndex = null;
}
} else {
- freqIndexCopy = null;
- posIndexCopy = null;
+ state.freqIndex = null;
+ state.posIndex = null;
}
- final long skipFP;
if (df >= skipMinimum) {
- skipFP = skipOut.getFilePointer();
+ state.skipFP = skipOut.getFilePointer();
//System.out.println(" skipFP=" + skipFP);
skipListWriter.writeSkip(skipOut);
//System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP));
} else {
- skipFP = -1;
+ state.skipFP = -1;
}
+ state.payloadFP = payloadStart;
lastDocID = 0;
df = 0;
-
- pendingTerms.add(new PendingTerm(docIndexCopy,
- freqIndexCopy,
- posIndexCopy,
- payloadStart,
- skipFP));
}
@Override
- public void flushTermsBlock(int start, int count) throws IOException {
- //System.out.println("SEPW: flushTermsBlock: start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size() + " termsOut.fp=" + termsOut.getFilePointer());
- assert indexBytesWriter.getFilePointer() == 0;
- final int absStart = pendingTerms.size() - start;
- final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
-
- long lastPayloadFP = 0;
- long lastSkipFP = 0;
-
- if (count == 0) {
- termsOut.writeByte((byte) 0);
- return;
- }
-
- final PendingTerm firstTerm = slice.get(0);
- final IntIndexOutput.Index docIndexFlush = firstTerm.docIndex;
- final IntIndexOutput.Index freqIndexFlush = firstTerm.freqIndex;
- final IntIndexOutput.Index posIndexFlush = firstTerm.posIndex;
-
- for(int idx=0;idx<slice.size();idx++) {
- final boolean isFirstTerm = idx == 0;
- final PendingTerm t = slice.get(idx);
- //System.out.println(" write idx=" + idx + " docIndex=" + t.docIndex);
- docIndexFlush.copyFrom(t.docIndex, false);
- docIndexFlush.write(indexBytesWriter, isFirstTerm);
- if (indexOptions != IndexOptions.DOCS_ONLY) {
- freqIndexFlush.copyFrom(t.freqIndex, false);
- freqIndexFlush.write(indexBytesWriter, isFirstTerm);
- //System.out.println(" freqIndex=" + t.freqIndex);
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- posIndexFlush.copyFrom(t.posIndex, false);
- posIndexFlush.write(indexBytesWriter, isFirstTerm);
- //System.out.println(" posIndex=" + t.posIndex);
- if (storePayloads) {
- //System.out.println(" payloadFP=" + t.payloadFP);
- if (isFirstTerm) {
- indexBytesWriter.writeVLong(t.payloadFP);
- } else {
- indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
- }
- lastPayloadFP = t.payloadFP;
+ public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+ SepTermState state = (SepTermState)_state;
+ if (absolute) {
+ lastSkipFP = 0;
+ lastPayloadFP = 0;
+ lastState = state;
+ }
+ lastState.docIndex.copyFrom(state.docIndex, false);
+ lastState.docIndex.write(out, absolute);
+ if (indexOptions != IndexOptions.DOCS_ONLY) {
+ lastState.freqIndex.copyFrom(state.freqIndex, false);
+ lastState.freqIndex.write(out, absolute);
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ lastState.posIndex.copyFrom(state.posIndex, false);
+ lastState.posIndex.write(out, absolute);
+ if (storePayloads) {
+ if (absolute) {
+ out.writeVLong(state.payloadFP);
+ } else {
+ out.writeVLong(state.payloadFP - lastPayloadFP);
}
+ lastPayloadFP = state.payloadFP;
}
}
-
- if (t.skipFP != -1) {
- if (isFirstTerm) {
- indexBytesWriter.writeVLong(t.skipFP);
- } else {
- indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
- }
- lastSkipFP = t.skipFP;
- //System.out.println(" skipFP=" + t.skipFP);
+ }
+ if (state.skipFP != -1) {
+ if (absolute) {
+ out.writeVLong(state.skipFP);
+ } else {
+ out.writeVLong(state.skipFP - lastSkipFP);
}
+ lastSkipFP = state.skipFP;
}
-
- //System.out.println(" numBytes=" + indexBytesWriter.getFilePointer());
- termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
- indexBytesWriter.writeTo(termsOut);
- indexBytesWriter.reset();
- slice.clear();
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Mon Sep 9 16:07:56 2013
@@ -34,6 +34,7 @@ public class BlockTermState extends OrdT
/** the term's ord in the current block */
public int termBlockOrd;
/** fp into the terms dict primary file (_X.tim) that holds this term */
+ // TODO: update BTR to nuke this
public long blockFilePointer;
/** Sole constructor. (For invocation by subclass
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1521173&r1=1521172&r2=1521173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Mon Sep 9 16:07:56 2013
@@ -158,6 +158,7 @@ public class BlockTreeTermsReader extend
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
+ final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0;
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@@ -168,7 +169,7 @@ public class BlockTreeTermsReader extend
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
final long indexStartFP = indexIn.readVLong();
- FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
+ FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
@@ -448,11 +449,12 @@ public class BlockTreeTermsReader extend
final long indexStartFP;
final long rootBlockFP;
final BytesRef rootCode;
- private final FST<BytesRef> index;
+ final int longsSize;
+ private final FST<BytesRef> index;
//private boolean DEBUG;
- FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
+ FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@@ -462,6 +464,7 @@ public class BlockTreeTermsReader extend
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
+ this.longsSize = longsSize;
// if (DEBUG) {
// System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
// }
@@ -612,6 +615,12 @@ public class BlockTreeTermsReader extend
FST.Arc<BytesRef> arc;
final BlockTermState termState;
+
+ // metadata buffer, holding monotonic values
+ public long[] longs;
+ // metadata buffer, holding general values
+ public byte[] bytes;
+ ByteArrayDataInput bytesReader;
// Cumulative output so far
BytesRef outputPrefix;
@@ -621,8 +630,9 @@ public class BlockTreeTermsReader extend
public Frame(int ord) throws IOException {
this.ord = ord;
- termState = postingsReader.newTermState();
- termState.totalTermFreq = -1;
+ this.termState = postingsReader.newTermState();
+ this.termState.totalTermFreq = -1;
+ this.longs = new long[longsSize];
}
void loadNextFloorBlock() throws IOException {
@@ -720,8 +730,17 @@ public class BlockTreeTermsReader extend
termState.termBlockOrd = 0;
nextEnt = 0;
-
- postingsReader.readTermsBlock(in, fieldInfo, termState);
+
+ // metadata
+ numBytes = in.readVInt();
+ if (bytes == null) {
+ bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ bytesReader = new ByteArrayDataInput();
+ } else if (bytes.length < numBytes) {
+ bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(bytes, 0, numBytes);
+ bytesReader.reset(bytes, 0, numBytes);
if (!isLastInFloor) {
// Sub-blocks of a single floor block are always
@@ -774,12 +793,9 @@ public class BlockTreeTermsReader extend
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
+ boolean absolute = metaDataUpto == 0;
assert limit > 0;
- // We must set/incr state.termCount because
- // postings impl can look at this
- termState.termBlockOrd = metaDataUpto;
-
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@@ -791,17 +807,24 @@ public class BlockTreeTermsReader extend
// TODO: if docFreq were bulk decoded we could
// just skipN here:
+
+ // stats
termState.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
+ // metadata
+ for (int i = 0; i < longsSize; i++) {
+ longs[i] = bytesReader.readVLong();
+ }
+ postingsReader.decodeTerm(longs, bytesReader, fieldInfo, termState, absolute);
- postingsReader.nextTerm(fieldInfo, termState);
metaDataUpto++;
- termState.termBlockOrd++;
+ absolute = false;
}
+ termState.termBlockOrd = metaDataUpto;
}
}
@@ -1707,6 +1730,7 @@ public class BlockTreeTermsReader extend
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
+
// if (DEBUG) {
// System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
@@ -2290,10 +2314,17 @@ public class BlockTreeTermsReader extend
final BlockTermState state;
+ // metadata buffer, holding monotonic values
+ public long[] longs;
+ // metadata buffer, holding general values
+ public byte[] bytes;
+ ByteArrayDataInput bytesReader;
+
public Frame(int ord) throws IOException {
this.ord = ord;
- state = postingsReader.newTermState();
- state.totalTermFreq = -1;
+ this.state = postingsReader.newTermState();
+ this.state.totalTermFreq = -1;
+ this.longs = new long[longsSize];
}
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@@ -2391,7 +2422,17 @@ public class BlockTreeTermsReader extend
// TODO: we could skip this if !hasTerms; but
// that's rare so won't help much
- postingsReader.readTermsBlock(in, fieldInfo, state);
+ // metadata
+ numBytes = in.readVInt();
+ if (bytes == null) {
+ bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ bytesReader = new ByteArrayDataInput();
+ } else if (bytes.length < numBytes) {
+ bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(bytes, 0, numBytes);
+ bytesReader.reset(bytes, 0, numBytes);
+
// Sub-blocks of a single floor block are always
// written one after another -- tail recurse:
@@ -2575,12 +2616,9 @@ public class BlockTreeTermsReader extend
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
+ boolean absolute = metaDataUpto == 0;
assert limit > 0;
- // We must set/incr state.termCount because
- // postings impl can look at this
- state.termBlockOrd = metaDataUpto;
-
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@@ -2592,17 +2630,24 @@ public class BlockTreeTermsReader extend
// TODO: if docFreq were bulk decoded we could
// just skipN here:
+
+ // stats
state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
+ // metadata
+ for (int i = 0; i < longsSize; i++) {
+ longs[i] = bytesReader.readVLong();
+ }
+ postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
- postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
- state.termBlockOrd++;
+ absolute = false;
}
+ state.termBlockOrd = metaDataUpto;
}
// Used only by assert