You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/10/20 20:25:54 UTC
svn commit: r1633196 [2/4] - in /lucene/dev/branches/lucene5969:
lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/
lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/
lucene/backward-codecs/src/resources/META-INF/service...
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java Mon Oct 20 18:25:52 2014
@@ -19,9 +19,11 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
import org.apache.lucene.codecs.lucene46.Lucene46RWSegmentInfoFormat;
@@ -33,6 +35,13 @@ import org.apache.lucene.codecs.lucene46
@Deprecated
public final class Lucene49RWCodec extends Lucene49Codec {
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
+
private static final DocValuesFormat docValues = new Lucene49RWDocValuesFormat();
@Override
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Mon Oct 20 18:25:52 2014
@@ -120,7 +120,7 @@ public class BlockTermsReader extends Fi
state.segmentInfo.getId(), state.segmentSuffix);
// Have PostingsReader init itself
- postingsReader.init(in);
+ postingsReader.init(in, state);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Mon Oct 20 18:25:52 2014
@@ -117,7 +117,7 @@ public class BlockTermsWriter extends Fi
//System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.init(out); // have consumer write its format/header
+ postingsWriter.init(out, state); // have consumer write its format/header
success = true;
} finally {
if (!success) {
Copied: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java (from r1632459, lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/Ords41PostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java?p2=lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java&p1=lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/Ords41PostingsFormat.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/Ords41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -24,14 +24,14 @@ import org.apache.lucene.codecs.FieldsPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
-/** Uses {@link OrdsBlockTreeTermsWriter} with {@link Lucene41PostingsWriter}. */
-public class Ords41PostingsFormat extends PostingsFormat {
+/** Uses {@link OrdsBlockTreeTermsWriter} with {@link Lucene50PostingsWriter}. */
+public class BlockTreeOrdsPostingsFormat extends PostingsFormat {
private final int minTermBlockSize;
private final int maxTermBlockSize;
@@ -45,7 +45,7 @@ public class Ords41PostingsFormat extend
/** Creates {@code Lucene41PostingsFormat} with default
* settings. */
- public Ords41PostingsFormat() {
+ public BlockTreeOrdsPostingsFormat() {
this(OrdsBlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, OrdsBlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
@@ -53,8 +53,8 @@ public class Ords41PostingsFormat extend
* values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
* @see OrdsBlockTreeTermsWriter#OrdsBlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
- public Ords41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
- super("OrdsLucene41");
+ public BlockTreeOrdsPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+ super("BlockTreeOrds");
this.minTermBlockSize = minTermBlockSize;
assert minTermBlockSize > 1;
this.maxTermBlockSize = maxTermBlockSize;
@@ -68,7 +68,7 @@ public class Ords41PostingsFormat extend
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+ PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
boolean success = false;
try {
@@ -87,11 +87,7 @@ public class Ords41PostingsFormat extend
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
- state.fieldInfos,
- state.segmentInfo,
- state.context,
- state.segmentSuffix);
+ PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new OrdsBlockTreeTermsReader(postingsReader, state);
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java Mon Oct 20 18:25:52 2014
@@ -94,7 +94,7 @@ public final class OrdsBlockTreeTermsRea
CodecUtil.checksumEntireFile(indexIn);
// Have PostingsReader init itself
- postingsReader.init(in);
+ postingsReader.init(in, state);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java Mon Oct 20 18:25:52 2014
@@ -214,7 +214,7 @@ public final class OrdsBlockTreeTermsWri
// System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.init(out); // have consumer write its format/header
+ postingsWriter.init(out, state); // have consumer write its format/header
success = true;
} finally {
if (!success) {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -26,7 +26,7 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; // javadocs
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -54,7 +54,7 @@ import org.apache.lucene.util.automaton.
// - build depth-N prefix hash?
// - or: longer dense skip lists than just next byte?
-/** Wraps {@link Lucene41PostingsFormat} format for on-disk
+/** Wraps {@link Lucene50PostingsFormat} format for on-disk
* storage, but then at read time loads and stores all
* terms & postings directly in RAM as byte[], int[].
*
@@ -102,12 +102,12 @@ public final class DirectPostingsFormat
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- return PostingsFormat.forName("Lucene41").fieldsConsumer(state);
+ return PostingsFormat.forName("Lucene50").fieldsConsumer(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- FieldsProducer postings = PostingsFormat.forName("Lucene41").fieldsProducer(state);
+ FieldsProducer postings = PostingsFormat.forName("Lucene50").fieldsProducer(state);
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -25,19 +25,19 @@ import org.apache.lucene.codecs.FieldsPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
- * FSTOrd term dict + Lucene41PBF
+ * FSTOrd term dict + Lucene50PBF
*/
public final class FSTOrdPostingsFormat extends PostingsFormat {
public FSTOrdPostingsFormat() {
- super("FSTOrd41");
+ super("FSTOrd50");
}
@Override
@@ -47,7 +47,7 @@ public final class FSTOrdPostingsFormat
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+ PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
boolean success = false;
try {
@@ -63,11 +63,7 @@ public final class FSTOrdPostingsFormat
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
- state.fieldInfos,
- state.segmentInfo,
- state.context,
- state.segmentSuffix);
+ PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java Mon Oct 20 18:25:52 2014
@@ -103,7 +103,7 @@ public class FSTOrdTermsReader extends F
CodecUtil.checksumEntireFile(blockIn);
- this.postingsReader.init(blockIn);
+ this.postingsReader.init(blockIn, state);
seekDir(blockIn);
final FieldInfos fieldInfos = state.fieldInfos;
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java Mon Oct 20 18:25:52 2014
@@ -178,7 +178,7 @@ public class FSTOrdTermsWriter extends F
state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.writeSegmentHeader(blockOut, TERMS_CODEC_NAME, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
- this.postingsWriter.init(blockOut);
+ this.postingsWriter.init(blockOut, state);
success = true;
} finally {
if (!success) {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -25,19 +25,19 @@ import org.apache.lucene.codecs.FieldsPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
- * FST term dict + Lucene41PBF
+ * FST term dict + Lucene50PBF
*/
public final class FSTPostingsFormat extends PostingsFormat {
public FSTPostingsFormat() {
- super("FST41");
+ super("FST50");
}
@Override
@@ -47,7 +47,7 @@ public final class FSTPostingsFormat ext
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+ PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
boolean success = false;
try {
@@ -63,11 +63,7 @@ public final class FSTPostingsFormat ext
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
- state.fieldInfos,
- state.segmentInfo,
- state.context,
- state.segmentSuffix);
+ PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new FSTTermsReader(state, postingsReader);
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java Mon Oct 20 18:25:52 2014
@@ -86,7 +86,7 @@ public class FSTTermsReader extends Fiel
FSTTermsWriter.TERMS_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.checksumEntireFile(in);
- this.postingsReader.init(in);
+ this.postingsReader.init(in, state);
seekDir(in);
final FieldInfos fieldInfos = state.fieldInfos;
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java Mon Oct 20 18:25:52 2014
@@ -145,7 +145,7 @@ public class FSTTermsWriter extends Fiel
CodecUtil.writeSegmentHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
- this.postingsWriter.init(out);
+ this.postingsWriter.init(out, state);
success = true;
} finally {
if (!success) {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Mon Oct 20 18:25:52 2014
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.blocktreeords.Ords41PostingsFormat
+org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.blockte
*/
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
@@ -26,7 +25,7 @@ import org.apache.lucene.util.TestUtil;
* Basic tests of a PF using FixedGap terms dictionary
*/
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene41WithOrds(TestUtil.nextInt(random(), 1, 1000)));
+ private final Codec codec = TestUtil.alwaysPostingsFormat(new LuceneFixedGap(TestUtil.nextInt(random(), 1, 1000)));
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -18,7 +18,7 @@ package org.apache.lucene.codecs.blockte
*/
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval;
+import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
@@ -26,7 +26,7 @@ import org.apache.lucene.util.TestUtil;
* Basic tests of a PF using VariableGap terms dictionary (fixed interval)
*/
public class TestVarGapDocFreqIntervalPostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene41VarGapFixedInterval(TestUtil.nextInt(random(), 1, 1000)));
+ private final Codec codec = TestUtil.alwaysPostingsFormat(new LuceneVarGapFixedInterval(TestUtil.nextInt(random(), 1, 1000)));
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -18,7 +18,7 @@ package org.apache.lucene.codecs.blockte
*/
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval;
+import org.apache.lucene.codecs.blockterms.LuceneVarGapDocFreqInterval;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
@@ -26,7 +26,7 @@ import org.apache.lucene.util.TestUtil;
* Basic tests of a PF using VariableGap terms dictionary (fixed interval, docFreq threshold)
*/
public class TestVarGapFixedIntervalPostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene41VarGapDocFreqInterval(TestUtil.nextInt(random(), 1, 100), TestUtil.nextInt(random(), 1, 1000)));
+ private final Codec codec = TestUtil.alwaysPostingsFormat(new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random(), 1, 100), TestUtil.nextInt(random(), 1, 1000)));
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java (original)
+++ lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java Mon Oct 20 18:25:52 2014
@@ -39,7 +39,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
public class TestOrdsBlockTree extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Ords41PostingsFormat());
+ private final Codec codec = TestUtil.alwaysPostingsFormat(new BlockTreeOrdsPostingsFormat());
@Override
protected Codec getCodec() {
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java Mon Oct 20 18:25:52 2014
@@ -23,6 +23,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@@ -53,7 +54,7 @@ public abstract class PostingsReaderBase
/** Performs any initialization, such as reading and
* verifying the header from the provided terms
* dictionary {@link IndexInput}. */
- public abstract void init(IndexInput termsIn) throws IOException;
+ public abstract void init(IndexInput termsIn, SegmentReadState state) throws IOException;
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java Mon Oct 20 18:25:52 2014
@@ -24,6 +24,7 @@ import org.apache.lucene.codecs.blocktre
import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -50,7 +51,7 @@ public abstract class PostingsWriterBase
/** Called once after startup, before any terms have been
* added. Implementations typically write a header to
* the provided {@code termsOut}. */
- public abstract void init(IndexOutput termsOut) throws IOException;
+ public abstract void init(IndexOutput termsOut, SegmentWriteState state) throws IOException;
/** Write all postings for one term; use the provided
* {@link TermsEnum} to pull a {@link DocsEnum} or {@link
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java Mon Oct 20 18:25:52 2014
@@ -24,8 +24,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@@ -72,11 +70,6 @@ public abstract class PushPostingsWriter
protected PushPostingsWriterBase() {
}
- /** Called once after startup, before any terms have been
- * added. Implementations typically write a header to
- * the provided {@code termsOut}. */
- public abstract void init(IndexOutput termsOut) throws IOException;
-
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
@@ -90,26 +83,11 @@ public abstract class PushPostingsWriter
* and will holds metadata from PBF when returned */
public abstract void finishTerm(BlockTermState state) throws IOException;
- /**
- * Encode metadata as long[] and byte[]. {@code absolute} controls whether
- * current term is delta encoded according to latest term.
- * Usually elements in {@code longs} are file pointers, so each one always
- * increases when a new term is consumed. {@code out} is used to write generic
- * bytes, which are not monotonic.
- *
- * NOTE: sometimes long[] might contain "don't care" values that are unused, e.g.
- * the pointer to postings list may not be defined for some terms but is defined
- * for others, if it is designed to inline some postings data in term dictionary.
- * In this case, the postings writer should always use the last value, so that each
- * element in metadata long[] remains monotonic.
- */
- public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
-
/**
* Sets the current field for writing, and returns the
* fixed length of long[] metadata (which is fixed per
* field), called when the writing switches to another field. */
- // TODO: better name?
+ @Override
public int setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
indexOptions = fieldInfo.getIndexOptions();
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Mon Oct 20 18:25:52 2014
@@ -30,12 +30,9 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
@@ -96,24 +93,22 @@ public final class BlockTreeTermsReader
private final int version;
/** Sole constructor. */
- public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
- PostingsReaderBase postingsReader, IOContext ioContext,
- String segmentSuffix)
+ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
throws IOException {
this.postingsReader = postingsReader;
- this.segment = info.name;
- in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION),
- ioContext);
+ this.segment = state.segmentInfo.name;
+ String termsFileName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION);
+ in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
IndexInput indexIn = null;
try {
version = readHeader(in);
- indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
- ioContext);
+ String indexFileName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION);
+ indexIn = state.directory.openInput(indexFileName, state.context);
int indexVersion = readIndexHeader(indexIn);
if (indexVersion != version) {
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion, indexIn);
@@ -125,7 +120,7 @@ public final class BlockTreeTermsReader
}
// Have PostingsReader init itself
- postingsReader.init(in);
+ postingsReader.init(in, state);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
@@ -158,7 +153,7 @@ public final class BlockTreeTermsReader
final BytesRef rootCode = new BytesRef(new byte[numBytes]);
in.readBytes(rootCode.bytes, 0, numBytes);
rootCode.length = numBytes;
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
throw new CorruptIndexException("invalid field number: " + field, in);
}
@@ -176,8 +171,8 @@ public final class BlockTreeTermsReader
} else {
minTerm = maxTerm = null;
}
- if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
- throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount(), in);
+ if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs
+ throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), in);
}
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Mon Oct 20 18:25:52 2014
@@ -329,7 +329,7 @@ public final class BlockTreeTermsWriter
// System.out.println("BTW.init seg=" + state.segmentName);
- postingsWriter.init(out); // have consumer write its format/header
+ postingsWriter.init(out, state); // have consumer write its format/header
success = true;
} finally {
if (!success) {
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java Mon Oct 20 18:25:52 2014
@@ -22,7 +22,7 @@ import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
-import org.apache.lucene.codecs.PostingsBaseFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -81,11 +81,11 @@ public class Stats {
public long totalBlockSuffixBytes;
/** Total number of bytes used to store term stats (not
- * including what the {@link PostingsBaseFormat}
+ * including what the {@link PostingsReaderBase}
* stores. */
public long totalBlockStatsBytes;
- /** Total bytes stored by the {@link PostingsBaseFormat},
+ /** Total bytes stored by the {@link PostingsReaderBase},
* plus the other few vInts stored in the frame. */
public long totalBlockOtherBytes;
Added: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/ForUtil.java?rev=1633196&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/ForUtil.java (added)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/ForUtil.java Mon Oct 20 18:25:52 2014
@@ -0,0 +1,246 @@
+package org.apache.lucene.codecs.lucene50;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.packed.PackedInts.Decoder;
+import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
+import org.apache.lucene.util.packed.PackedInts;
+
+import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
+
+/**
+ * Encode all values in normal area with fixed bit width,
+ * which is determined by the max value in this block.
+ */
+final class ForUtil {
+
+ /**
+ * Special number of bits per value used whenever all values to encode are equal.
+ */
+ private static final int ALL_VALUES_EQUAL = 0;
+
+ /**
+ * Upper limit of the number of bytes that might be required to stored
+ * <code>BLOCK_SIZE</code> encoded values.
+ */
+ static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4;
+
+ /**
+ * Upper limit of the number of values that might be decoded in a single call to
+ * {@link #readBlock(IndexInput, byte[], int[])}. Although values after
+ * <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
+ * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
+ */
+ static final int MAX_DATA_SIZE;
+ static {
+ int maxDataSize = 0;
+ for(int version=PackedInts.VERSION_START;version<=PackedInts.VERSION_CURRENT;version++) {
+ for (PackedInts.Format format : PackedInts.Format.values()) {
+ for (int bpv = 1; bpv <= 32; ++bpv) {
+ if (!format.isSupported(bpv)) {
+ continue;
+ }
+ final PackedInts.Decoder decoder = PackedInts.getDecoder(format, version, bpv);
+ final int iterations = computeIterations(decoder);
+ maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount());
+ }
+ }
+ }
+ MAX_DATA_SIZE = maxDataSize;
+ }
+
+ /**
+ * Compute the number of iterations required to decode <code>BLOCK_SIZE</code>
+ * values with the provided {@link Decoder}.
+ */
+ private static int computeIterations(PackedInts.Decoder decoder) {
+ return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount());
+ }
+
+ /**
+ * Compute the number of bytes required to encode a block of values that require
+ * <code>bitsPerValue</code> bits per value with format <code>format</code>.
+ */
+ private static int encodedSize(PackedInts.Format format, int packedIntsVersion, int bitsPerValue) {
+ final long byteCount = format.byteCount(packedIntsVersion, BLOCK_SIZE, bitsPerValue);
+ assert byteCount >= 0 && byteCount <= Integer.MAX_VALUE : byteCount;
+ return (int) byteCount;
+ }
+
+ private final int[] encodedSizes;
+ private final PackedInts.Encoder[] encoders;
+ private final PackedInts.Decoder[] decoders;
+ private final int[] iterations;
+
+ /**
+ * Create a new {@link ForUtil} instance and save state into <code>out</code>.
+ */
+ ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
+ out.writeVInt(PackedInts.VERSION_CURRENT);
+ encodedSizes = new int[33];
+ encoders = new PackedInts.Encoder[33];
+ decoders = new PackedInts.Decoder[33];
+ iterations = new int[33];
+
+ for (int bpv = 1; bpv <= 32; ++bpv) {
+ final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(
+ BLOCK_SIZE, bpv, acceptableOverheadRatio);
+ assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
+ assert formatAndBits.bitsPerValue <= 32;
+ encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+ encoders[bpv] = PackedInts.getEncoder(
+ formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+ decoders[bpv] = PackedInts.getDecoder(
+ formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
+ iterations[bpv] = computeIterations(decoders[bpv]);
+
+ out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
+ }
+ }
+
+ /**
+ * Restore a {@link ForUtil} from a {@link DataInput}.
+ */
+ ForUtil(DataInput in) throws IOException {
+ int packedIntsVersion = in.readVInt();
+ PackedInts.checkVersion(packedIntsVersion);
+ encodedSizes = new int[33];
+ encoders = new PackedInts.Encoder[33];
+ decoders = new PackedInts.Decoder[33];
+ iterations = new int[33];
+
+ for (int bpv = 1; bpv <= 32; ++bpv) {
+ final int code = in.readVInt();
+ final int formatId = code >>> 5;
+ final int bitsPerValue = (code & 31) + 1;
+
+ final PackedInts.Format format = PackedInts.Format.byId(formatId);
+ assert format.isSupported(bitsPerValue);
+ encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue);
+ encoders[bpv] = PackedInts.getEncoder(
+ format, packedIntsVersion, bitsPerValue);
+ decoders[bpv] = PackedInts.getDecoder(
+ format, packedIntsVersion, bitsPerValue);
+ iterations[bpv] = computeIterations(decoders[bpv]);
+ }
+ }
+
+ /**
+ * Write a block of data (<code>For</code> format).
+ *
+ * @param data the data to write
+ * @param encoded a buffer to use to encode data
+ * @param out the destination output
+ * @throws IOException If there is a low-level I/O error
+ */
+ void writeBlock(int[] data, byte[] encoded, IndexOutput out) throws IOException {
+ if (isAllEqual(data)) {
+ out.writeByte((byte) ALL_VALUES_EQUAL);
+ out.writeVInt(data[0]);
+ return;
+ }
+
+ final int numBits = bitsRequired(data);
+ assert numBits > 0 && numBits <= 32 : numBits;
+ final PackedInts.Encoder encoder = encoders[numBits];
+ final int iters = iterations[numBits];
+ assert iters * encoder.byteValueCount() >= BLOCK_SIZE;
+ final int encodedSize = encodedSizes[numBits];
+ assert iters * encoder.byteBlockCount() >= encodedSize;
+
+ out.writeByte((byte) numBits);
+
+ encoder.encode(data, 0, encoded, 0, iters);
+ out.writeBytes(encoded, encodedSize);
+ }
+
+ /**
+ * Read the next block of data (<code>For</code> format).
+ *
+ * @param in the input to use to read data
+ * @param encoded a buffer that can be used to store encoded data
+ * @param decoded where to write decoded data
+ * @throws IOException If there is a low-level I/O error
+ */
+ void readBlock(IndexInput in, byte[] encoded, int[] decoded) throws IOException {
+ final int numBits = in.readByte();
+ assert numBits <= 32 : numBits;
+
+ if (numBits == ALL_VALUES_EQUAL) {
+ final int value = in.readVInt();
+ Arrays.fill(decoded, 0, BLOCK_SIZE, value);
+ return;
+ }
+
+ final int encodedSize = encodedSizes[numBits];
+ in.readBytes(encoded, 0, encodedSize);
+
+ final PackedInts.Decoder decoder = decoders[numBits];
+ final int iters = iterations[numBits];
+ assert iters * decoder.byteValueCount() >= BLOCK_SIZE;
+
+ decoder.decode(encoded, 0, decoded, 0, iters);
+ }
+
+ /**
+ * Skip the next block of data.
+ *
+ * @param in the input where to read data
+ * @throws IOException If there is a low-level I/O error
+ */
+ void skipBlock(IndexInput in) throws IOException {
+ final int numBits = in.readByte();
+ if (numBits == ALL_VALUES_EQUAL) {
+ in.readVInt();
+ return;
+ }
+ assert numBits > 0 && numBits <= 32 : numBits;
+ final int encodedSize = encodedSizes[numBits];
+ in.seek(in.getFilePointer() + encodedSize);
+ }
+
+ private static boolean isAllEqual(final int[] data) {
+ final int v = data[0];
+ for (int i = 1; i < BLOCK_SIZE; ++i) {
+ if (data[i] != v) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Compute the number of bits required to serialize any of the longs in
+ * <code>data</code>.
+ */
+ private static int bitsRequired(final int[] data) {
+ long or = 0;
+ for (int i = 0; i < BLOCK_SIZE; ++i) {
+ assert data[i] >= 0;
+ or |= data[i];
+ }
+ return PackedInts.bitsRequired(or);
+ }
+
+}
Modified: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java Mon Oct 20 18:25:52 2014
@@ -106,7 +106,7 @@ public class Lucene50Codec extends Codec
/** Returns the postings format that should be used for writing
* new segments of <code>field</code>.
*
- * The default implementation always returns "Lucene41"
+ * The default implementation always returns "Lucene50"
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultFormat;
@@ -126,7 +126,7 @@ public class Lucene50Codec extends Codec
return docValuesFormat;
}
- private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
+ private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene50");
private final NormsFormat normsFormat = new Lucene50NormsFormat();
Added: lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java?rev=1633196&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java (added)
+++ lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -0,0 +1,443 @@
+package org.apache.lucene.codecs.lucene50;
+
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Lucene 5.0 postings format, which encodes postings in packed integer blocks
+ * for fast decode.
+ *
+ * <p>
+ * Basic idea:
+ * <ul>
+ * <li>
+ * <b>Packed Blocks and VInt Blocks</b>:
+ * <p>In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed format}):
+ * the block size (i.e. number of integers inside block) is fixed (currently 128). Additionally blocks
+ * that are all the same value are encoded in an optimized way.</p>
+ * <p>In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}:
+ * the block size is variable.</p>
+ * </li>
+ *
+ * <li>
+ * <b>Block structure</b>:
+ * <p>When the postings are long enough, Lucene50PostingsFormat will try to encode most integer data
+ * as a packed block.</p>
+ * <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed
+ * blocks, while the remaining 3 are encoded as one VInt block. </p>
+ * <p>Different kinds of data are always encoded separately into different packed blocks, but may
+ * possibly be interleaved into the same VInt block. </p>
+ * <p>This strategy is applied to pairs:
+ * <document number, frequency>,
+ * <position, payload length>,
+ * <position, offset start, offset length>, and
+ * <position, payload length, offsetstart, offset length>.</p>
+ * </li>
+ *
+ * <li>
+ * <b>Skipdata settings</b>:
+ * <p>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the
+ * same as block size, and each skip entry points to the beginning of each block. However, for
+ * the first block, skip data is omitted.</p>
+ * </li>
+ *
+ * <li>
+ * <b>Positions, Payloads, and Offsets</b>:
+ * <p>A position is an integer indicating where the term occurs within one document.
+ * A payload is a blob of metadata associated with current position.
+ * An offset is a pair of integers indicating the tokenized start/end offsets for given term
+ * in current position: it is essentially a specialized payload. </p>
+ * <p>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a
+ * null payload contributes one count). As mentioned in block structure, it is possible to encode
+ * these three either combined or separately.
+ * <p>In all cases, payloads and offsets are stored together. When encoded as a packed block,
+ * position data is separated out as .pos, while payloads and offsets are encoded in .pay (payload
+ * metadata will also be stored directly in .pay). When encoded as VInt blocks, all these three are
+ * stored interleaved into the .pos (so is payload metadata).</p>
+ * <p>With this strategy, the majority of payload and offset data will be outside .pos file.
+ * So for queries that require only position data, running on a full index with payloads and offsets,
+ * this reduces disk pre-fetches.</p>
+ * </li>
+ * </ul>
+ * </p>
+ *
+ * <p>
+ * Files and detailed format:
+ * <ul>
+ * <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
+ * <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
+ * <li><tt>.doc</tt>: <a href="#Frequencies">Frequencies and Skip Data</a></li>
+ * <li><tt>.pos</tt>: <a href="#Positions">Positions</a></li>
+ * <li><tt>.pay</tt>: <a href="#Payloads">Payloads and Offsets</a></li>
+ * </ul>
+ * </p>
+ *
+ * <a name="Termdictionary" id="Termdictionary"></a>
+ * <dl>
+ * <dd>
+ * <b>Term Dictionary</b>
+ *
+ * <p>The .tim file contains the list of terms in each
+ * field along with per-term statistics (such as docfreq)
+ * and pointers to the frequencies, positions, payload and
+ * skip data in the .doc, .pos, and .pay files.
+ * See {@link BlockTreeTermsWriter} for more details on the format.
+ * </p>
+ *
+ * <p>NOTE: The term dictionary can plug into different postings implementations:
+ * the postings writer/reader are actually responsible for encoding
+ * and decoding the PostingsHeader and TermMetadata sections described here:</p>
+ *
+ * <ul>
+ * <li>PostingsHeader --> Header, PackedBlockSize</li>
+ * <li>TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?,
+ * SkipFPDelta?</li>
+ * <li>Header, --> {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
+ * <li>PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}</li>
+ * <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * <p>Notes:</p>
+ * <ul>
+ * <li>Header is a {@link CodecUtil#writeSegmentHeader SegmentHeader} storing the version information
+ * for the postings.</li>
+ * <li>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is
+ * determined by the largest integer. Smaller block size result in smaller variance among width
+ * of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence
+ * better acceleration. This value should always be a multiple of 64, currently fixed as 128 as
+ * a tradeoff. It is also the skip interval used to accelerate {@link DocsEnum#advance(int)}.
+ * <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file.
+ * In particular, it is the difference of file offset between this term's
+ * data and previous term's data (or zero, for the first term in the block).On disk it is
+ * stored as the difference from previous value in sequence. </li>
+ * <li>PosFPDelta determines the position of this term's TermPositions within the .pos file.
+ * While PayFPDelta determines the position of this term's <TermPayloads, TermOffsets?> within
+ * the .pay file. Similar to DocFPDelta, it is the difference between two file positions (or
+ * neglected, for fields that omit payloads and offsets).</li>
+ * <li>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed
+ * block within the .pos file. It is synonym for PayVIntBlockFPDelta or OffsetVIntBlockFPDelta.
+ * This is actually used to indicate whether it is necessary to load following
+ * payloads and offsets from .pos instead of .pay. Every time a new block of positions are to be
+ * loaded, the PostingsReader will use this value to check whether current block is packed format
+ * or VInt. When packed format, payloads and offsets are fetched from .pay, otherwise from .pos.
+ * (this value is neglected when total number of positions i.e. totalTermFreq is less or equal
+ * to PackedBlockSize).
+ * <li>SkipFPDelta determines the position of this term's SkipData within the .doc
+ * file. In particular, it is the length of the TermFreq data.
+ * SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
+ * (i.e. 128 in Lucene50PostingsFormat).</li>
+ * <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
+ * of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
+ * single document ID is written to the term dictionary.</li>
+ * </ul>
+ * </dd>
+ * </dl>
+ *
+ * <a name="Termindex" id="Termindex"></a>
+ * <dl>
+ * <dd>
+ * <b>Term Index</b>
+ * <p>The .tip file contains an index into the term dictionary, so that it can be
+ * accessed randomly. See {@link BlockTreeTermsWriter} for more details on the format.</p>
+ * </dd>
+ * </dl>
+ *
+ *
+ * <a name="Frequencies" id="Frequencies"></a>
+ * <dl>
+ * <dd>
+ * <b>Frequencies and Skip Data</b>
+ *
+ * <p>The .doc file contains the lists of documents which contain each term, along
+ * with the frequency of the term in that document (except when frequencies are
+ * omitted: {@link IndexOptions#DOCS_ONLY}). It also saves skip data to the beginning of
+ * each packed or VInt block, when the length of document list is larger than packed block size.</p>
+ *
+ * <ul>
+ * <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</li>
+ * <li>Header --> {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
+ * <li>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>,
+ * VIntBlock? </li>
+ * <li>PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?
+ * <li>VIntBlock --> <DocDelta[, Freq?]><sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup>
+ * <li>SkipData --> <<SkipLevelLength, SkipLevel>
+ * <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li>
+ * <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
+ * <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
+ * PayFPSkip?>?, SkipChildLevelPointer?</li>
+ * <li>PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}</li>
+ * <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip
+ * -->
+ * {@link DataOutput#writeVInt VInt}</li>
+ * <li>SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * <p>Notes:</p>
+ * <ul>
+ * <li>PackedDocDeltaBlock is theoretically generated from two steps:
+ * <ol>
+ * <li>Calculate the difference between each document number and previous one,
+ * and get a d-gaps list (for the first document, use absolute value); </li>
+ * <li>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>,
+ * separately encode as packed blocks.</li>
+ * </ol>
+ * If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step.
+ * </li>
+ * <li>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format
+ * that encodes DocDelta and Freq:
+ * <p>DocDelta: if frequencies are indexed, this determines both the document
+ * number and the frequency. In particular, DocDelta/2 is the difference between
+ * this document number and the previous document number (or zero when this is the
+ * first document in a TermFreqs). When DocDelta is odd, the frequency is one.
+ * When DocDelta is even, the frequency is read as another VInt. If frequencies
+ * are omitted, DocDelta contains the gap (not multiplied by 2) between document
+ * numbers and no frequency information is stored.</p>
+ * <p>For example, the TermFreqs for a term which occurs once in document seven
+ * and three times in document eleven, with frequencies indexed, would be the
+ * following sequence of VInts:</p>
+ * <p>15, 8, 3</p>
+ * <p>If frequencies were omitted ({@link IndexOptions#DOCS_ONLY}) it would be this
+ * sequence of VInts instead:</p>
+ * <p>7,4</p>
+ * </li>
+ * <li>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies.
+ * In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </li>
+ * <li>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq.
+ * We use this trick since the definition of skip entry is a little different from base interface.
+ * In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
+ * skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However,
+ * in Lucene50PostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>,
+ * 2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case).
+ * When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one
+ * more skip data than Lucene50SkipWriter. </li>
+ * <li>SkipDatum is the metadata of one skip entry.
+ * For the first block (no matter packed or VInt), it is omitted.</li>
+ * <li>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in
+ * the postings (i.e. last document number in each packed block). On disk it is stored as the
+ * difference from previous value in the sequence. </li>
+ * <li>DocFPSkip records the file offsets of each block (excluding )posting at
+ * PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile.
+ * The file offsets are relative to the start of current term's TermFreqs.
+ * On disk it is also stored as the difference from previous SkipDatum in the sequence.</li>
+ * <li>Since positions and payloads are also block encoded, the skip should skip to related block first,
+ * then fetch the values according to in-block offset. PosFPSkip and PayFPSkip record the file
+ * offsets of related block in .pos and .pay, respectively. While PosBlockOffset indicates
+ * which value to fetch inside the related block (PayBlockOffset is unnecessary since it is always
+ * equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
+ * current term's TermFreqs, and stored as a difference sequence.</li>
+ * <li>PayByteUpto indicates the start offset of the current payload. It is equivalent to
+ * the sum of the payload lengths in the current block up to PosBlockOffset</li>
+ * </ul>
+ * </dd>
+ * </dl>
+ *
+ * <a name="Positions" id="Positions"></a>
+ * <dl>
+ * <dd>
+ * <b>Positions</b>
+ * <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
+ * sometimes stores part of payloads and offsets for speedup.</p>
+ * <ul>
+ * <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</li>
+ * <li>Header --> {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
+ * <li>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>,
+ * VIntBlock? </li>
+ * <li>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?,
+ * OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup>
+ * <li>PackedPosDeltaBlock --> {@link PackedInts PackedInts}</li>
+ * <li>PositionDelta, OffsetDelta, OffsetLength -->
+ * {@link DataOutput#writeVInt VInt}</li>
+ * <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * <p>Notes:</p>
+ * <ul>
+ * <li>TermPositions are order by term (terms are implicit, from the term dictionary), and position
+ * values for each term document pair are incremental, and ordered by document number.</li>
+ * <li>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets.
+ * In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </li>
+ * <li>PosVIntCount is the number of positions encoded as VInt format. In particular,
+ * PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</li>
+ * <li>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock
+ * in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</li>
+ * <li>PositionDelta is, if payloads are disabled for the term's field, the
+ * difference between the position of the current occurrence in the document and
+ * the previous occurrence (or zero, if this is the first occurrence in this
+ * document). If payloads are enabled for the term's field, then PositionDelta/2
+ * is the difference between the current and the previous position. If payloads
+ * are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
+ * the length of the payload at the current term position.</li>
+ * <li>For example, the TermPositions for a term which occurs as the fourth term in
+ * one document, and as the fifth and ninth term in a subsequent document, would
+ * be the following sequence of VInts (payloads disabled):
+ * <p>4, 5, 4</p></li>
+ * <li>PayloadData is metadata associated with the current term position. If
+ * PayloadLength is stored at the current position, then it indicates the length
+ * of this payload. If PayloadLength is not stored, then this payload has the same
+ * length as the payload at the previous position.</li>
+ * <li>OffsetDelta/2 is the difference between this position's startOffset from the
+ * previous occurrence (or zero, if this is the first occurrence in this document).
+ * If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
+ * previous occurrence and an OffsetLength follows. Offset data is only written for
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.</li>
+ * </ul>
+ * </dd>
+ * </dl>
+ *
+ * <a name="Payloads" id="Payloads"></a>
+ * <dl>
+ * <dd>
+ * <b>Payloads and Offsets</b>
+ * <p>The .pay file will store payloads and offsets associated with certain term-document positions.
+ * Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
+ * <ul>
+ * <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</li>
+ * <li>Header --> {@link CodecUtil#writeSegmentHeader SegmentHeader}</li>
+ * <li>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup>
+ * <li>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup>
+ * <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}</li>
+ * <li>SumPayLength --> {@link DataOutput#writeVInt VInt}</li>
+ * <li>PayData --> {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * <p>Notes:</p>
+ * <ul>
+ * <li>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of
+ * payload/offsets are stored in .pos.</li>
+ * <li>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the
+ * same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.
+ * While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</li>
+ * <li>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym
+ * for PackedOffsetBlockNum.</li>
+ * <li>SumPayLength is the total length of payloads written within one block, should be the sum
+ * of PayLengths in one packed block.</li>
+ * <li>PayLength in PackedPayLengthBlock is the length of each payload associated with the current
+ * position.</li>
+ * </ul>
+ * </dd>
+ * </dl>
+ * </p>
+ *
+ * @lucene.experimental
+ */
+
+public final class Lucene50PostingsFormat extends PostingsFormat {
+ /**
+ * Filename extension for document number, frequencies, and skip data.
+ * See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
+ */
+ public static final String DOC_EXTENSION = "doc";
+
+ /**
+ * Filename extension for positions.
+ * See chapter: <a href="#Positions">Positions</a>
+ */
+ public static final String POS_EXTENSION = "pos";
+
+ /**
+ * Filename extension for payloads and offsets.
+ * See chapter: <a href="#Payloads">Payloads and Offsets</a>
+ */
+ public static final String PAY_EXTENSION = "pay";
+
+ private final int minTermBlockSize;
+ private final int maxTermBlockSize;
+
+ /**
+ * Fixed packed block size, number of integers encoded in
+ * a single packed block.
+ */
+ // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
+ public final static int BLOCK_SIZE = 128;
+
+ /** Creates {@code Lucene50PostingsFormat} with default
+ * settings. */
+ public Lucene50PostingsFormat() {
+ this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ }
+
+ /** Creates {@code Lucene50PostingsFormat} with custom
+ * values for {@code minBlockSize} and {@code
+ * maxBlockSize} passed to block terms dictionary.
+ * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
+ public Lucene50PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+ super("Lucene50");
+ this.minTermBlockSize = minTermBlockSize;
+ assert minTermBlockSize > 1;
+ this.maxTermBlockSize = maxTermBlockSize;
+ assert minTermBlockSize <= maxTermBlockSize;
+ }
+
+ @Override
+ public String toString() {
+ return getName() + "(blocksize=" + BLOCK_SIZE + ")";
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
+
+ boolean success = false;
+ try {
+ FieldsConsumer ret = new BlockTreeTermsWriter(state,
+ postingsWriter,
+ minTermBlockSize,
+ maxTermBlockSize);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(postingsWriter);
+ }
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
+ boolean success = false;
+ try {
+ FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(postingsReader);
+ }
+ }
+ }
+}