You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/09/04 16:29:49 UTC
svn commit: r1520034 - in /lucene/dev/branches/lucene3069/lucene:
codecs/src/java/org/apache/lucene/codecs/memory/
codecs/src/java/org/apache/lucene/codecs/temp/
codecs/src/resources/META-INF/services/
test-framework/src/java/org/apache/lucene/codecs/m...
Author: han
Date: Wed Sep 4 14:29:48 2013
New Revision: 1520034
URL: http://svn.apache.org/r1520034
Log:
LUCENE-3069: move TermDict impls to package 'memory', nuke all 'Temp' symbols
Added:
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPulsing41PostingsFormat.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java
- copied, changed from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPulsing41PostingsFormat.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/package.html (with props)
Removed:
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/
Modified:
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
lucene/dev/branches/lucene3069/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -70,17 +70,17 @@ import org.apache.lucene.codecs.CodecUti
*
* @lucene.experimental
*/
-public class TempFSTOrdTermsReader extends FieldsProducer {
- static final int INTERVAL = TempFSTOrdTermsWriter.SKIP_INTERVAL;
+public class FSTOrdTermsReader extends FieldsProducer {
+ static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
final PostingsReaderBase postingsReader;
IndexInput indexIn = null;
IndexInput blockIn = null;
//static final boolean TEST = false;
- public TempFSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
- final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
- final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
+ public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
+ final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
+ final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
this.postingsReader = postingsReader;
try {
@@ -113,9 +113,9 @@ public class TempFSTOrdTermsReader exten
}
private int readHeader(IndexInput in) throws IOException {
- return CodecUtil.checkHeader(in, TempFSTOrdTermsWriter.TERMS_CODEC_NAME,
- TempFSTOrdTermsWriter.TERMS_VERSION_START,
- TempFSTOrdTermsWriter.TERMS_VERSION_CURRENT);
+ return CodecUtil.checkHeader(in, FSTOrdTermsWriter.TERMS_CODEC_NAME,
+ FSTOrdTermsWriter.TERMS_VERSION_START,
+ FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
in.seek(in.length() - 8);
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -27,6 +27,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
@@ -46,12 +47,104 @@ import org.apache.lucene.codecs.TermStat
import org.apache.lucene.codecs.CodecUtil;
/**
- * FST based term dict, the FST maps each term and its ord.
+ * FST-based term dict, using ord as FST output.
*
- * @lucene.experimental
+ * The FST holds the mapping between <term, ord>, and
+ * term's metadata is delta encoded into a single byte block.
+ *
+ * Typically the byte block consists of four parts:
+ * 1. term statistics: docFreq, totalTermFreq;
+ * 2. monotonic long[], e.g. the pointer to the postings list for that term;
+ * 3. generic byte[], e.g. other information customized by postings base.
+ * 4. single-level skip list to speed up metadata decoding by ord.
+ *
+ * <p>
+ * Files:
+ * <ul>
+ * <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
+ * <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
+ * </ul>
+ * </p>
+ *
+ * <a name="Termindex" id="Termindex"></a>
+ * <h3>Term Index</h3>
+ * <p>
+ * The .tix contains a list of FSTs, one for each field.
+ * The FST maps a term to its corresponding order in current field.
+ * </p>
+ *
+ * <ul>
+ * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
+ * <li>TermFST --> {@link FST FST<long>}</li>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * </ul>
+ *
+ * <p>Notes:</p>
+ * <ul>
+ * <li>
+ * Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
+ * their ords can directly used to seek term metadata from term block.
+ * </li>
+ * </ul>
+ *
+ * <a name="Termblock" id="Termblock"></a>
+ * <h3>Term Block</h3>
+ * <p>
+ * The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
+ * per-field data like number of documents in current field). For each field, there are four blocks:
+ * <ul>
+ * <li>statistics bytes block: contains term statistics; </li>
+ * <li>metadata longs block: delta-encodes monotonical part of metadata; </li>
+ * <li>metadata bytes block: encodes other parts of metadata; </li>
+ * <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
+ * </ul>
+ * </p>
+ *
+ * <p>File Format:</p>
+ * <ul>
+ * <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
+ * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
+ * DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
+ *
+ * <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
+ * SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
+ * <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
+ * MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
+ * <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
+ * <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
+ * <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
+ * <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
+ * FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
+ * <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
+ * StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
+ * LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
+ * </ul>
+ * <p>Notes: </p>
+ * <ul>
+ * <li>
+ * The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
+ * they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
+ * (non-monotonical ones like pulsed postings data).
+ * </li>
+ * <li>
+ * During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
+ * term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
+ * for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
+ * the value of preceding metadata longs for every SkipInterval's term.
+ * </li>
+ * <li>
+ * DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
+ * Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
+ * so that encoding of TotalTermFreq may be omitted.
+ * </li>
+ * </ul>
+ *
+ * @lucene.experimental
*/
-public class TempFSTOrdTermsWriter extends FieldsConsumer {
+public class FSTOrdTermsWriter extends FieldsConsumer {
static final String TERMS_INDEX_EXTENSION = "tix";
static final String TERMS_BLOCK_EXTENSION = "tbk";
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
@@ -65,7 +158,7 @@ public class TempFSTOrdTermsWriter exten
IndexOutput blockOut = null;
IndexOutput indexOut = null;
- public TempFSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
+ public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -29,15 +29,15 @@ import org.apache.lucene.util.LongsRef;
/**
* An FST {@link Outputs} implementation for
- * {@link TempFSTPostingsFormat}.
+ * {@link FSTTermsWriter}.
*
* @lucene.experimental
*/
// NOTE: outputs should be per-field, since
// longsSize is fixed for each field
-public class TempTermOutputs extends Outputs<TempTermOutputs.TempTermData> {
- private final static TempTermData NO_OUTPUT = new TempTermData();
+class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
+ private final static TermData NO_OUTPUT = new TermData();
//private static boolean TEST = false;
private final boolean hasPos;
private final int longsSize;
@@ -47,18 +47,18 @@ public class TempTermOutputs extends Out
* On an FST, only long[] part is 'shared' and pushed towards root.
* byte[] and term stats will be kept on deeper arcs.
*/
- public static class TempTermData {
+ static class TermData {
long[] longs;
byte[] bytes;
int docFreq;
long totalTermFreq;
- TempTermData() {
+ TermData() {
this.longs = null;
this.bytes = null;
this.docFreq = 0;
this.totalTermFreq = -1;
}
- TempTermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
+ TermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
this.longs = longs;
this.bytes = bytes;
this.docFreq = docFreq;
@@ -92,10 +92,10 @@ public class TempTermOutputs extends Out
public boolean equals(Object other_) {
if (other_ == this) {
return true;
- } else if (!(other_ instanceof TempTermOutputs.TempTermData)) {
+ } else if (!(other_ instanceof FSTTermOutputs.TermData)) {
return false;
}
- TempTermData other = (TempTermData) other_;
+ TermData other = (TermData) other_;
return statsEqual(this, other) &&
longsEqual(this, other) &&
bytesEqual(this, other);
@@ -103,7 +103,7 @@ public class TempTermOutputs extends Out
}
}
- protected TempTermOutputs(FieldInfo fieldInfo, int longsSize) {
+ protected FSTTermOutputs(FieldInfo fieldInfo, int longsSize) {
this.hasPos = (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY);
this.longsSize = longsSize;
}
@@ -115,7 +115,7 @@ public class TempTermOutputs extends Out
// 1. every value in t1 is not larger than in t2, or
// 2. every value in t1 is not smaller than t2.
//
- public TempTermData common(TempTermData t1, TempTermData t2) {
+ public TermData common(TermData t1, TermData t2) {
//if (TEST) System.out.print("common("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
@@ -125,7 +125,7 @@ public class TempTermOutputs extends Out
long[] min = t1.longs, max = t2.longs;
int pos = 0;
- TempTermData ret;
+ TermData ret;
while (pos < longsSize && min[pos] == max[pos]) {
pos++;
@@ -142,7 +142,7 @@ public class TempTermOutputs extends Out
if (pos < longsSize || allZero(min)) { // not comparable or all-zero
ret = NO_OUTPUT;
} else {
- ret = new TempTermData(min, null, 0, -1);
+ ret = new TermData(min, null, 0, -1);
}
} else { // equal long[]
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
@@ -150,7 +150,7 @@ public class TempTermOutputs extends Out
} else if (allZero(min)) {
ret = NO_OUTPUT;
} else {
- ret = new TempTermData(min, null, 0, -1);
+ ret = new TermData(min, null, 0, -1);
}
}
//if (TEST) System.out.println("ret:"+ret);
@@ -158,7 +158,7 @@ public class TempTermOutputs extends Out
}
@Override
- public TempTermData subtract(TempTermData t1, TempTermData t2) {
+ public TermData subtract(TermData t1, TermData t2) {
//if (TEST) System.out.print("subtract("+t1+", "+t2+") = ");
if (t2 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+t1);
@@ -176,21 +176,21 @@ public class TempTermOutputs extends Out
pos++;
}
- TempTermData ret;
+ TermData ret;
if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) {
ret = NO_OUTPUT;
} else {
- ret = new TempTermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
+ ret = new TermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (TEST) System.out.println("ret:"+ret);
return ret;
}
- // TODO: if we refactor a 'addSelf(TempMetaDat other)',
+ // TODO: if we refactor a 'addSelf(TermData other)',
// we can gain about 5~7% for fuzzy queries, however this also
// means we are putting too much stress on FST Outputs decoding?
@Override
- public TempTermData add(TempTermData t1, TempTermData t2) {
+ public TermData add(TermData t1, TermData t2) {
//if (TEST) System.out.print("add("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT) {
//if (TEST) System.out.println("ret:"+t2);
@@ -209,18 +209,18 @@ public class TempTermOutputs extends Out
pos++;
}
- TempTermData ret;
+ TermData ret;
if (t2.bytes != null || t2.docFreq > 0) {
- ret = new TempTermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
+ ret = new TermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
} else {
- ret = new TempTermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
+ ret = new TermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (TEST) System.out.println("ret:"+ret);
return ret;
}
@Override
- public void write(TempTermData data, DataOutput out) throws IOException {
+ public void write(TermData data, DataOutput out) throws IOException {
int bit0 = allZero(data.longs) ? 0 : 1;
int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1;
int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2;
@@ -259,7 +259,7 @@ public class TempTermOutputs extends Out
}
@Override
- public TempTermData read(DataInput in) throws IOException {
+ public TermData read(DataInput in) throws IOException {
long[] longs = new long[longsSize];
byte[] bytes = null;
int docFreq = 0;
@@ -292,29 +292,29 @@ public class TempTermOutputs extends Out
docFreq = code;
}
}
- return new TempTermData(longs, bytes, docFreq, totalTermFreq);
+ return new TermData(longs, bytes, docFreq, totalTermFreq);
}
@Override
- public TempTermData getNoOutput() {
+ public TermData getNoOutput() {
return NO_OUTPUT;
}
@Override
- public String outputToString(TempTermData data) {
+ public String outputToString(TermData data) {
return data.toString();
}
- static boolean statsEqual(final TempTermData t1, final TempTermData t2) {
+ static boolean statsEqual(final TermData t1, final TermData t2) {
return t1.docFreq == t2.docFreq && t1.totalTermFreq == t2.totalTermFreq;
}
- static boolean bytesEqual(final TempTermData t1, final TempTermData t2) {
+ static boolean bytesEqual(final TermData t1, final TermData t2) {
if (t1.bytes == null && t2.bytes == null) {
return true;
}
return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes);
}
- static boolean longsEqual(final TempTermData t1, final TempTermData t2) {
+ static boolean longsEqual(final TermData t1, final TermData t2) {
if (t1.longs == null && t2.longs == null) {
return true;
}
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -68,14 +68,14 @@ import org.apache.lucene.codecs.CodecUti
* @lucene.experimental
*/
-public class TempFSTTermsReader extends FieldsProducer {
+public class FSTTermsReader extends FieldsProducer {
final TreeMap<String, TermsReader> fields = new TreeMap<String, TermsReader>();
final PostingsReaderBase postingsReader;
final IndexInput in;
//static boolean TEST = false;
- public TempFSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
- final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
+ public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
+ final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
this.in = state.directory.openInput(termsFileName, state.context);
@@ -109,9 +109,9 @@ public class TempFSTTermsReader extends
}
private int readHeader(IndexInput in) throws IOException {
- return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
- TempFSTTermsWriter.TERMS_VERSION_START,
- TempFSTTermsWriter.TERMS_VERSION_CURRENT);
+ return CodecUtil.checkHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
+ FSTTermsWriter.TERMS_VERSION_START,
+ FSTTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
in.seek(in.length() - 8);
@@ -167,7 +167,7 @@ public class TempFSTTermsReader extends
final long sumDocFreq;
final int docCount;
final int longsSize;
- final FST<TempTermOutputs.TempTermData> dict;
+ final FST<FSTTermOutputs.TermData> dict;
TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo;
@@ -176,7 +176,7 @@ public class TempFSTTermsReader extends
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
- this.dict = new FST<TempTermOutputs.TempTermData>(in, new TempTermOutputs(fieldInfo, longsSize));
+ this.dict = new FST<FSTTermOutputs.TermData>(in, new FSTTermOutputs(fieldInfo, longsSize));
}
@Override
@@ -238,7 +238,7 @@ public class TempFSTTermsReader extends
final BlockTermState state;
/* Current term stats + undecoded metadata (long[] & byte[]) */
- TempTermOutputs.TempTermData meta;
+ FSTTermOutputs.TermData meta;
ByteArrayDataInput bytesReader;
/** Decodes metadata into customized term state */
@@ -306,7 +306,7 @@ public class TempFSTTermsReader extends
// Iterates through all terms in this field
private final class SegmentTermsEnum extends BaseTermsEnum {
- final BytesRefFSTEnum<TempTermOutputs.TempTermData> fstEnum;
+ final BytesRefFSTEnum<FSTTermOutputs.TermData> fstEnum;
/* True when current term's metadata is decoded */
boolean decoded;
@@ -316,7 +316,7 @@ public class TempFSTTermsReader extends
SegmentTermsEnum() throws IOException {
super();
- this.fstEnum = new BytesRefFSTEnum<TempTermOutputs.TempTermData>(dict);
+ this.fstEnum = new BytesRefFSTEnum<FSTTermOutputs.TermData>(dict);
this.decoded = false;
this.seekPending = false;
this.meta = null;
@@ -335,7 +335,7 @@ public class TempFSTTermsReader extends
}
// Update current enum according to FSTEnum
- void updateEnum(final InputOutput<TempTermOutputs.TempTermData> pair) {
+ void updateEnum(final InputOutput<FSTTermOutputs.TermData> pair) {
if (pair == null) {
term = null;
} else {
@@ -405,22 +405,22 @@ public class TempFSTTermsReader extends
int metaUpto;
/* term dict fst */
- final FST<TempTermOutputs.TempTermData> fst;
+ final FST<FSTTermOutputs.TermData> fst;
final FST.BytesReader fstReader;
- final Outputs<TempTermOutputs.TempTermData> fstOutputs;
+ final Outputs<FSTTermOutputs.TermData> fstOutputs;
/* query automaton to intersect with */
final ByteRunAutomaton fsa;
private final class Frame {
/* fst stats */
- FST.Arc<TempTermOutputs.TempTermData> fstArc;
+ FST.Arc<FSTTermOutputs.TermData> fstArc;
/* automaton stats */
int fsaState;
Frame() {
- this.fstArc = new FST.Arc<TempTermOutputs.TempTermData>();
+ this.fstArc = new FST.Arc<FSTTermOutputs.TermData>();
this.fsaState = -1;
}
@@ -475,7 +475,7 @@ public class TempFSTTermsReader extends
/** Lazily accumulate meta data, when we got a accepted term */
void loadMetaData() throws IOException {
- FST.Arc<TempTermOutputs.TempTermData> last, next;
+ FST.Arc<FSTTermOutputs.TermData> last, next;
last = stack[metaUpto].fstArc;
while (metaUpto != level) {
metaUpto++;
@@ -626,7 +626,7 @@ public class TempFSTTermsReader extends
/** Load frame for target arc(node) on fst, so that
* arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
- FST.Arc<TempTermOutputs.TempTermData> arc = frame.fstArc;
+ FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
if (arc == null) {
return null;
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -27,6 +27,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
@@ -44,13 +45,83 @@ import org.apache.lucene.codecs.TermsCon
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.CodecUtil;
-/**
- * FST based term dict, the FST maps each term and its metadata.
+/**
+ * FST-based term dict, using metadata as FST output.
+ *
+ * The FST directly holds the mapping between <term, metadata>.
+ *
+ * Term metadata consists of three parts:
+ * 1. term statistics: docFreq, totalTermFreq;
+ * 2. monotonic long[], e.g. the pointer to the postings list for that term;
+ * 3. generic byte[], e.g. other information need by postings reader.
+ *
+ * <p>
+ * File:
+ * <ul>
+ * <li><tt>.tst</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
+ * </ul>
+ * <p>
+ *
+ * <a name="Termdictionary" id="Termdictionary"></a>
+ * <h3>Term Dictionary</h3>
+ * <p>
+ * The .tst contains a list of FSTs, one for each field.
+ * The FST maps a term to its corresponding statistics (e.g. docfreq)
+ * and metadata (e.g. information for postings list reader like file pointer
+ * to postings list).
+ * </p>
+ * <p>
+ * Typically the metadata is separated into two parts:
+ * <ul>
+ * <li>
+ * Monotonical long array: Some metadata will always be ascending in order
+ * with the corresponding term. This part is used by FST to share outputs between arcs.
+ * </li>
+ * <li>
+ * Generic byte array: Used to store non-monotonical metadata.
+ * </li>
+ * </ul>
+ * </p>
+ *
+ * File format:
+ * <ul>
+ * <li>TermsDict(.tst) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
+ * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?,
+ * SumDocFreq, DocCount, LongsSize, TermFST ><sup>NumFields</sup></li>
+ * <li>TermFST --> {@link FST FST<TermData>}</li>
+ * <li>TermData --> Flag, BytesSize?, LongDelta<sup>LongsSize</sup>?, Byte<sup>BytesSize</sup>?,
+ * < DocFreq[Same?], (TotalTermFreq-DocFreq) > ? </li>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
+ * <li>DocFreq, LongsSize, BytesSize, NumFields,
+ * FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
+ * <li>TotalTermFreq, NumTerms, SumTotalTermFreq, SumDocFreq, LongDelta -->
+ * {@link DataOutput#writeVLong VLong}</li>
+ * </ul>
+ * <p>Notes:</p>
+ * <ul>
+ * <li>
+ * The format of PostingsHeader and generic meta bytes are customized by the specific postings implementation:
+ * they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
+ * (non-monotonical ones like pulsed postings data).
+ * </li>
+ * <li>
+ * The format of TermData is determined by FST, typically monotonical metadata will be dense around shallow arcs,
+ * while in deeper arcs only generic bytes and term statistics exist.
+ * </li>
+ * <li>
+ * The byte Flag is used to indicate which part of metadata exists on current arc. Specially the monotonical part
+ * is omitted when it is an array of 0s.
+ * </li>
+ * <li>
+ * Since LongsSize is per-field fixed, it is only written once in field summary.
+ * </li>
+ * </ul>
*
* @lucene.experimental
*/
-public class TempFSTTermsWriter extends FieldsConsumer {
+public class FSTTermsWriter extends FieldsConsumer {
static final String TERMS_EXTENSION = "tmp";
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
@@ -61,7 +132,7 @@ public class TempFSTTermsWriter extends
final IndexOutput out;
final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
- public TempFSTTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
+ public FSTTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
this.postingsWriter = postingsWriter;
@@ -125,9 +196,9 @@ public class TempFSTTermsWriter extends
public final long sumDocFreq;
public final int docCount;
public final int longsSize;
- public final FST<TempTermOutputs.TempTermData> dict;
+ public final FST<FSTTermOutputs.TermData> dict;
- public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempTermData> fst) {
+ public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst) {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -139,8 +210,8 @@ public class TempFSTTermsWriter extends
}
final class TermsWriter extends TermsConsumer {
- private final Builder<TempTermOutputs.TempTermData> builder;
- private final TempTermOutputs outputs;
+ private final Builder<FSTTermOutputs.TermData> builder;
+ private final FSTTermOutputs outputs;
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
@@ -153,8 +224,8 @@ public class TempFSTTermsWriter extends
this.numTerms = 0;
this.fieldInfo = fieldInfo;
this.longsSize = postingsWriter.setField(fieldInfo);
- this.outputs = new TempTermOutputs(fieldInfo, longsSize);
- this.builder = new Builder<TempTermOutputs.TempTermData>(FST.INPUT_TYPE.BYTE1, outputs);
+ this.outputs = new FSTTermOutputs(fieldInfo, longsSize);
+ this.builder = new Builder<FSTTermOutputs.TermData>(FST.INPUT_TYPE.BYTE1, outputs);
}
@Override
@@ -172,7 +243,7 @@ public class TempFSTTermsWriter extends
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// write term meta data into fst
final BlockTermState state = postingsWriter.newTermState();
- final TempTermOutputs.TempTermData meta = new TempTermOutputs.TempTermData();
+ final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
meta.longs = new long[longsSize];
meta.bytes = null;
meta.docFreq = state.docFreq = stats.docFreq;
@@ -193,7 +264,7 @@ public class TempFSTTermsWriter extends
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// save FST dict
if (numTerms > 0) {
- final FST<TempTermOutputs.TempTermData> fst = builder.finish();
+ final FST<FSTTermOutputs.TermData> fst = builder.finish();
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
}
}
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html?rev=1520034&r1=1520033&r2=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html Wed Sep 4 14:29:48 2013
@@ -20,6 +20,6 @@
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
-Postings and DocValues formats that are read entirely into memory.
+Term dictionary, DocValues or Postings formats that are read entirely into memory.
</body>
-</html>
\ No newline at end of file
+</html>
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1520034&r1=1520033&r2=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Wed Sep 4 14:29:48 2013
@@ -18,7 +18,3 @@ org.apache.lucene.codecs.simpletext.Simp
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
-org.apache.lucene.codecs.temp.TempFSTPulsing41PostingsFormat
-org.apache.lucene.codecs.temp.TempFSTOrdPulsing41PostingsFormat
-org.apache.lucene.codecs.temp.TempFSTPostingsFormat
-org.apache.lucene.codecs.temp.TempFSTOrdPostingsFormat
Copied: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
@@ -31,111 +31,14 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.codecs.CodecUtil; // javadocs
-import org.apache.lucene.store.DataOutput; // javadocs
-import org.apache.lucene.util.fst.FST; // javadocs
/**
- * FST-based term dict, using ord as FST output.
- *
- * The FST holds the mapping between <term, ord>, and
- * term's metadata is delta encoded into a single byte block.
- *
- * Typically the byte block consists of four parts:
- * 1. term statistics: docFreq, totalTermFreq;
- * 2. monotonic long[], e.g. the pointer to the postings list for that term;
- * 3. generic byte[], e.g. other information customized by postings base.
- * 4. single-level skip list to speed up metadata decoding by ord.
- *
- * <p>
- * Files:
- * <ul>
- * <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
- * <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
- * </ul>
- * </p>
- *
- * <a name="Termindex" id="Termindex"></a>
- * <h3>Term Index</h3>
- * <p>
- * The .tix contains a list of FSTs, one for each field.
- * The FST maps a term to its corresponding order in current field.
- * </p>
- *
- * <ul>
- * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
- * <li>TermFST --> {@link FST FST<long>}</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * </ul>
- *
- * <p>Notes:</p>
- * <ul>
- * <li>
- * Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
- * their ords can directly used to seek term metadata from term block.
- * </li>
- * </ul>
- *
- * <a name="Termblock" id="Termblock"></a>
- * <h3>Term Block</h3>
- * <p>
- * The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
- * per-field data like number of documents in current field). For each field, there are four blocks:
- * <ul>
- * <li>statistics bytes block: contains term statistics; </li>
- * <li>metadata longs block: delta-encodes monotonical part of metadata; </li>
- * <li>metadata bytes block: encodes other parts of metadata; </li>
- * <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
- * </ul>
- * </p>
- *
- * <p>File Format:</p>
- * <ul>
- * <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
- * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
- * DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
- *
- * <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
- * SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
- * <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
- * MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
- * <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
- * <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
- * <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
- * <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
- * FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
- * <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
- * StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
- * LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
- * </ul>
- * <p>Notes: </p>
- * <ul>
- * <li>
- * The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
- * they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
- * (non-monotonical ones like pulsed postings data).
- * </li>
- * <li>
- * During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
- * term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
- * for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
- * the value of preceding metadata longs for every SkipInterval's term.
- * </li>
- * <li>
- * DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
- * Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
- * so that encoding of TotalTermFreq may be omitted.
- * </li>
- * </ul>
- *
- * @lucene.experimental
+ * FSTOrd term dict + Lucene41PBF
*/
-public final class TempFSTOrdPostingsFormat extends PostingsFormat {
- public TempFSTOrdPostingsFormat() {
- super("TempFSTOrd");
+public final class FSTOrdPostingsFormat extends PostingsFormat {
+ public FSTOrdPostingsFormat() {
+ super("FSTOrd41");
}
@Override
@@ -149,7 +52,7 @@ public final class TempFSTOrdPostingsFor
boolean success = false;
try {
- FieldsConsumer ret = new TempFSTOrdTermsWriter(state, postingsWriter);
+ FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
@@ -168,7 +71,7 @@ public final class TempFSTOrdPostingsFor
state.segmentSuffix);
boolean success = false;
try {
- FieldsProducer ret = new TempFSTOrdTermsReader(state, postingsReader);
+ FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
success = true;
return ret;
} finally {
Copied: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPulsing41PostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPulsing41PostingsFormat.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTOrdPulsing41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -35,19 +35,19 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
-/** TempFSTOrd + Pulsing41
+/** FSTOrd + Pulsing41
* @lucene.experimental */
-public class TempFSTOrdPulsing41PostingsFormat extends PostingsFormat {
+public class FSTOrdPulsing41PostingsFormat extends PostingsFormat {
private final PostingsBaseFormat wrappedPostingsBaseFormat;
private final int freqCutoff;
- public TempFSTOrdPulsing41PostingsFormat() {
+ public FSTOrdPulsing41PostingsFormat() {
this(1);
}
- public TempFSTOrdPulsing41PostingsFormat(int freqCutoff) {
- super("TempFSTOrdPulsing41");
+ public FSTOrdPulsing41PostingsFormat(int freqCutoff) {
+ super("FSTOrdPulsing41");
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
this.freqCutoff = freqCutoff;
}
@@ -61,7 +61,7 @@ public class TempFSTOrdPulsing41Postings
try {
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
- FieldsConsumer ret = new TempFSTOrdTermsWriter(state, pulsingWriter);
+ FieldsConsumer ret = new FSTOrdTermsWriter(state, pulsingWriter);
success = true;
return ret;
} finally {
@@ -79,7 +79,7 @@ public class TempFSTOrdPulsing41Postings
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(state, docsReader);
- FieldsProducer ret = new TempFSTOrdTermsReader(state, pulsingReader);
+ FieldsProducer ret = new FSTOrdTermsReader(state, pulsingReader);
success = true;
return ret;
} finally {
Copied: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
@@ -31,89 +31,14 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.codecs.CodecUtil; // javadocs
-import org.apache.lucene.store.DataOutput; // javadocs
-import org.apache.lucene.util.fst.FST; // javadocs
/**
- * FST-based term dict, using metadata as FST output.
- *
- * The FST directly holds the mapping between <term, metadata>.
- *
- * Term metadata consists of three parts:
- * 1. term statistics: docFreq, totalTermFreq;
- * 2. monotonic long[], e.g. the pointer to the postings list for that term;
- * 3. generic byte[], e.g. other information need by postings reader.
- *
- * <p>
- * File:
- * <ul>
- * <li><tt>.tst</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
- * </ul>
- * <p>
- *
- * <a name="Termdictionary" id="Termdictionary"></a>
- * <h3>Term Dictionary</h3>
- * <p>
- * The .tst contains a list of FSTs, one for each field.
- * The FST maps a term to its corresponding statistics (e.g. docfreq)
- * and metadata (e.g. information for postings list reader like file pointer
- * to postings list).
- * </p>
- * <p>
- * Typically the metadata is separated into two parts:
- * <ul>
- * <li>
- * Monotonical long array: Some metadata will always be ascending in order
- * with the corresponding term. This part is used by FST to share outputs between arcs.
- * </li>
- * <li>
- * Generic byte array: Used to store non-monotonical metadata.
- * </li>
- * </ul>
- * </p>
- *
- * File format:
- * <ul>
- * <li>TermsDict(.tst) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
- * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?,
- * SumDocFreq, DocCount, LongsSize, TermFST ><sup>NumFields</sup></li>
- * <li>TermFST --> {@link FST FST<TermData>}</li>
- * <li>TermData --> Flag, BytesSize?, LongDelta<sup>LongsSize</sup>?, Byte<sup>BytesSize</sup>?,
- * < DocFreq[Same?], (TotalTermFreq-DocFreq) > ? </li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
- * <li>DocFreq, LongsSize, BytesSize, NumFields,
- * FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
- * <li>TotalTermFreq, NumTerms, SumTotalTermFreq, SumDocFreq, LongDelta -->
- * {@link DataOutput#writeVLong VLong}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- * <li>
- * The format of PostingsHeader and generic meta bytes are customized by the specific postings implementation:
- * they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
- * (non-monotonical ones like pulsed postings data).
- * </li>
- * <li>
- * The format of TermData is determined by FST, typically monotonical metadata will be dense around shallow arcs,
- * while in deeper arcs only generic bytes and term statistics exist.
- * </li>
- * <li>
- * The byte Flag is used to indicate which part of metadata exists on current arc. Specially the monotonical part
- * is omitted when it is an array of 0s.
- * </li>
- * <li>
- * Since LongsSize is per-field fixed, it is only written once in field summary.
- * </li>
- * </ul>
- *
- * @lucene.experimental
+ * FST term dict + Lucene41PBF
*/
-public final class TempFSTPostingsFormat extends PostingsFormat {
- public TempFSTPostingsFormat() {
- super("TempFST");
+public final class FSTPostingsFormat extends PostingsFormat {
+ public FSTPostingsFormat() {
+ super("FST41");
}
@Override
@@ -127,7 +52,7 @@ public final class TempFSTPostingsFormat
boolean success = false;
try {
- FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
+ FieldsConsumer ret = new FSTTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
@@ -146,7 +71,7 @@ public final class TempFSTPostingsFormat
state.segmentSuffix);
boolean success = false;
try {
- FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
+ FieldsProducer ret = new FSTTermsReader(state, postingsReader);
success = true;
return ret;
} finally {
Copied: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java (from r1519988, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPulsing41PostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPulsing41PostingsFormat.java&r1=1519988&r2=1520034&rev=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempFSTPulsing41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java Wed Sep 4 14:29:48 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.temp;
+package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -35,20 +35,20 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
-/** TempFST + Pulsing41, test only, since
+/** FST + Pulsing41, test only, since
* FST does no delta encoding here!
* @lucene.experimental */
-public class TempFSTPulsing41PostingsFormat extends PostingsFormat {
+public class FSTPulsing41PostingsFormat extends PostingsFormat {
private final PostingsBaseFormat wrappedPostingsBaseFormat;
private final int freqCutoff;
- public TempFSTPulsing41PostingsFormat() {
+ public FSTPulsing41PostingsFormat() {
this(1);
}
- public TempFSTPulsing41PostingsFormat(int freqCutoff) {
- super("TempFSTPulsing41");
+ public FSTPulsing41PostingsFormat(int freqCutoff) {
+ super("FSTPulsing41");
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
this.freqCutoff = freqCutoff;
}
@@ -62,7 +62,7 @@ public class TempFSTPulsing41PostingsFor
try {
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
- FieldsConsumer ret = new TempFSTTermsWriter(state, pulsingWriter);
+ FieldsConsumer ret = new FSTTermsWriter(state, pulsingWriter);
success = true;
return ret;
} finally {
@@ -80,7 +80,7 @@ public class TempFSTPulsing41PostingsFor
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(state, docsReader);
- FieldsProducer ret = new TempFSTTermsReader(state, pulsingReader);
+ FieldsProducer ret = new FSTTermsReader(state, pulsingReader);
success = true;
return ret;
} finally {
Added: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/package.html?rev=1520034&view=auto
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/package.html (added)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/memory/package.html Wed Sep 4 14:29:48 2013
@@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Codec to test FST-based term dictionary with some postings base format.
+</body>
+</html>
Modified: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java?rev=1520034&r1=1520033&r2=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java Wed Sep 4 14:29:48 2013
@@ -50,10 +50,10 @@ import org.apache.lucene.codecs.sep.IntI
import org.apache.lucene.codecs.sep.IntStreamFactory;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
-import org.apache.lucene.codecs.temp.TempFSTTermsWriter;
-import org.apache.lucene.codecs.temp.TempFSTTermsReader;
-import org.apache.lucene.codecs.temp.TempFSTOrdTermsWriter;
-import org.apache.lucene.codecs.temp.TempFSTOrdTermsReader;
+import org.apache.lucene.codecs.memory.FSTTermsWriter;
+import org.apache.lucene.codecs.memory.FSTTermsReader;
+import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
+import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
@@ -196,7 +196,7 @@ public final class MockRandomPostingsFor
if (t1 == 0) {
boolean success = false;
try {
- fields = new TempFSTTermsWriter(state, postingsWriter);
+ fields = new FSTTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
@@ -206,7 +206,7 @@ public final class MockRandomPostingsFor
} else if (t1 == 1) {
boolean success = false;
try {
- fields = new TempFSTOrdTermsWriter(state, postingsWriter);
+ fields = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
@@ -355,7 +355,7 @@ public final class MockRandomPostingsFor
if (t1 == 0) {
boolean success = false;
try {
- fields = new TempFSTTermsReader(state, postingsReader);
+ fields = new FSTTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
@@ -365,7 +365,7 @@ public final class MockRandomPostingsFor
} else if (t1 == 1) {
boolean success = false;
try {
- fields = new TempFSTOrdTermsReader(state, postingsReader);
+ fields = new FSTOrdTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
Modified: lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1520034&r1=1520033&r2=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Wed Sep 4 14:29:48 2013
@@ -50,10 +50,10 @@ import org.apache.lucene.codecs.nestedpu
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
-import org.apache.lucene.codecs.temp.TempFSTOrdPostingsFormat;
-import org.apache.lucene.codecs.temp.TempFSTOrdPulsing41PostingsFormat;
-import org.apache.lucene.codecs.temp.TempFSTPostingsFormat;
-import org.apache.lucene.codecs.temp.TempFSTPulsing41PostingsFormat;
+import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
+import org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat;
+import org.apache.lucene.codecs.memory.FSTPostingsFormat;
+import org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -129,10 +129,10 @@ public class RandomCodec extends Lucene4
add(avoidCodecs,
new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
- new TempFSTPostingsFormat(),
- new TempFSTOrdPostingsFormat(),
- new TempFSTPulsing41PostingsFormat(1 + random.nextInt(20)),
- new TempFSTOrdPulsing41PostingsFormat(1 + random.nextInt(20)),
+ new FSTPostingsFormat(),
+ new FSTOrdPostingsFormat(),
+ new FSTPulsing41PostingsFormat(1 + random.nextInt(20)),
+ new FSTOrdPulsing41PostingsFormat(1 + random.nextInt(20)),
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)),
new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
Modified: lucene/dev/branches/lucene3069/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1520034&r1=1520033&r2=1520034&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene3069/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Wed Sep 4 14:29:48 2013
@@ -25,3 +25,7 @@ org.apache.lucene.codecs.lucene41vargap.
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
+org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTPostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPostingsFormat