You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/10/20 20:25:54 UTC
svn commit: r1633196 [1/4] - in /lucene/dev/branches/lucene5969:
lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/
lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/
lucene/backward-codecs/src/resources/META-INF/service...
Author: rmuir
Date: Mon Oct 20 18:25:52 2014
New Revision: 1633196
URL: http://svn.apache.org/r1633196
Log:
LUCENE-5969: move old postings back compat to backward-codecs, cleanup PBF related stuff, add segment headers, etc
Added:
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/IntBlockTermState.java (with props)
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWPostingsFormat.java (with props)
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41ForUtil.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat2.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat3.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/BlockTreeOrdsPostingsFormat.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/Ords41PostingsFormat.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/ForUtil.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java (with props)
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat2.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat3.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestForUtil.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/
- copied from r1632459, lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/LuceneFixedGap.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/LuceneVarGapDocFreqInterval.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapDocFreqInterval.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/LuceneVarGapFixedInterval.java
- copied, changed from r1632459, lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapFixedInterval.java
Removed:
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/Ords41PostingsFormat.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsBaseFormat.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/Lucene41VarGapDocFreqInterval.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/Lucene41VarGapFixedInterval.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/
Modified:
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/Lucene410RWCodec.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/TestLucene410DocValuesFormat.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java
lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene49/Lucene49RWCodec.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
lucene/dev/branches/lucene5969/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
lucene/dev/branches/lucene5969/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene50/package.html
lucene/dev/branches/lucene5969/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50DocValuesFormat.java
lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
lucene/dev/branches/lucene5969/lucene/misc/src/test/org/apache/lucene/uninverting/TestDocTermOrds.java
lucene/dev/branches/lucene5969/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
lucene/dev/branches/lucene5969/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
lucene/dev/branches/lucene5969/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
lucene/dev/branches/lucene5969/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/blockterms/package.html
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
lucene/dev/branches/lucene5969/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene5969/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -51,13 +51,7 @@ public class Lucene40PostingsFormat exte
boolean success = false;
try {
- FieldsProducer ret = new BlockTreeTermsReader(
- state.directory,
- state.fieldInfos,
- state.segmentInfo,
- postings,
- state.context,
- state.segmentSuffix);
+ FieldsProducer ret = new BlockTreeTermsReader(postings, state);
success = true;
return ret;
} finally {
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Mon Oct 20 18:25:52 2014
@@ -31,6 +31,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
@@ -104,7 +105,7 @@ final class Lucene40PostingsReader exten
}
@Override
- public void init(IndexInput termsIn) throws IOException {
+ public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
// Make sure we are talking to the matching past writer
CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT);
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java Mon Oct 20 18:25:52 2014
@@ -30,9 +30,10 @@ import org.apache.lucene.util.packed.Pac
import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
/**
- * Encode all values in normal area with fixed bit width,
- * which is determined by the max value in this block.
+ * Lucene 4.1 postings format.
+ * @deprecated only for reading old 4.x segments
*/
+@Deprecated
final class ForUtil {
/**
Added: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/IntBlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/IntBlockTermState.java?rev=1633196&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/IntBlockTermState.java (added)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/IntBlockTermState.java Mon Oct 20 18:25:52 2014
@@ -0,0 +1,45 @@
+package org.apache.lucene.codecs.lucene41;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.TermState;
+
+/**
+ * term state for Lucene 4.1 postings format
+ * @deprecated only for reading old 4.x segments
+ */
+@Deprecated
+final class IntBlockTermState extends BlockTermState {
+ long docStartFP = 0;
+ long posStartFP = 0;
+ long payStartFP = 0;
+ long skipOffset = -1;
+ long lastPosBlockOffset = -1;
+ // docid when there is a single pulsed posting, otherwise -1
+ // freq is always implicitly totalTermFreq in this case.
+ int singletonDocID = -1;
+
+ @Override
+ public IntBlockTermState clone() {
+ IntBlockTermState other = new IntBlockTermState();
+ other.copyFrom(this);
+ return other;
+ }
+
+ @Override
+ public void copyFrom(TermState _other) {
+ super.copyFrom(_other);
+ IntBlockTermState other = (IntBlockTermState) _other;
+ docStartFP = other.docStartFP;
+ posStartFP = other.posStartFP;
+ payStartFP = other.payStartFP;
+ lastPosBlockOffset = other.lastPosBlockOffset;
+ skipOffset = other.skipOffset;
+ singletonDocID = other.singletonDocID;
+ }
+
+
+ @Override
+ public String toString() {
+ return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
+ }
+}
\ No newline at end of file
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -1,6 +1,5 @@
package org.apache.lucene.codecs.lucene41;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,345 +19,21 @@ package org.apache.lucene.codecs.lucene4
import java.io.IOException;
-import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
-import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.PackedInts;
/**
- * Lucene 4.1 postings format, which encodes postings in packed integer blocks
- * for fast decode.
- *
- * <p><b>NOTE</b>: this format is still experimental and
- * subject to change without backwards compatibility.
- *
- * <p>
- * Basic idea:
- * <ul>
- * <li>
- * <b>Packed Blocks and VInt Blocks</b>:
- * <p>In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed format}):
- * the block size (i.e. number of integers inside block) is fixed (currently 128). Additionally blocks
- * that are all the same value are encoded in an optimized way.</p>
- * <p>In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}:
- * the block size is variable.</p>
- * </li>
- *
- * <li>
- * <b>Block structure</b>:
- * <p>When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data
- * as a packed block.</p>
- * <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed
- * blocks, while the remaining 3 are encoded as one VInt block. </p>
- * <p>Different kinds of data are always encoded separately into different packed blocks, but may
- * possibly be interleaved into the same VInt block. </p>
- * <p>This strategy is applied to pairs:
- * <document number, frequency>,
- * <position, payload length>,
- * <position, offset start, offset length>, and
- * <position, payload length, offsetstart, offset length>.</p>
- * </li>
- *
- * <li>
- * <b>Skipdata settings</b>:
- * <p>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the
- * same as block size, and each skip entry points to the beginning of each block. However, for
- * the first block, skip data is omitted.</p>
- * </li>
- *
- * <li>
- * <b>Positions, Payloads, and Offsets</b>:
- * <p>A position is an integer indicating where the term occurs within one document.
- * A payload is a blob of metadata associated with current position.
- * An offset is a pair of integers indicating the tokenized start/end offsets for given term
- * in current position: it is essentially a specialized payload. </p>
- * <p>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a
- * null payload contributes one count). As mentioned in block structure, it is possible to encode
- * these three either combined or separately.
- * <p>In all cases, payloads and offsets are stored together. When encoded as a packed block,
- * position data is separated out as .pos, while payloads and offsets are encoded in .pay (payload
- * metadata will also be stored directly in .pay). When encoded as VInt blocks, all these three are
- * stored interleaved into the .pos (so is payload metadata).</p>
- * <p>With this strategy, the majority of payload and offset data will be outside .pos file.
- * So for queries that require only position data, running on a full index with payloads and offsets,
- * this reduces disk pre-fetches.</p>
- * </li>
- * </ul>
- * </p>
- *
- * <p>
- * Files and detailed format:
- * <ul>
- * <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
- * <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
- * <li><tt>.doc</tt>: <a href="#Frequencies">Frequencies and Skip Data</a></li>
- * <li><tt>.pos</tt>: <a href="#Positions">Positions</a></li>
- * <li><tt>.pay</tt>: <a href="#Payloads">Payloads and Offsets</a></li>
- * </ul>
- * </p>
- *
- * <a name="Termdictionary" id="Termdictionary"></a>
- * <dl>
- * <dd>
- * <b>Term Dictionary</b>
- *
- * <p>The .tim file contains the list of terms in each
- * field along with per-term statistics (such as docfreq)
- * and pointers to the frequencies, positions, payload and
- * skip data in the .doc, .pos, and .pay files.
- * See {@link BlockTreeTermsWriter} for more details on the format.
- * </p>
- *
- * <p>NOTE: The term dictionary can plug into different postings implementations:
- * the postings writer/reader are actually responsible for encoding
- * and decoding the PostingsHeader and TermMetadata sections described here:</p>
- *
- * <ul>
- * <li>PostingsHeader --> Header, PackedBlockSize</li>
- * <li>TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?,
- * SkipFPDelta?</li>
- * <li>Header, --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}</li>
- * <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- * <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
- * for the postings.</li>
- * <li>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is
- * determined by the largest integer. Smaller block size result in smaller variance among width
- * of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence
- * better acceleration. This value should always be a multiple of 64, currently fixed as 128 as
- * a tradeoff. It is also the skip interval used to accelerate {@link DocsEnum#advance(int)}.
- * <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file.
- * In particular, it is the difference of file offset between this term's
- * data and previous term's data (or zero, for the first term in the block).On disk it is
- * stored as the difference from previous value in sequence. </li>
- * <li>PosFPDelta determines the position of this term's TermPositions within the .pos file.
- * While PayFPDelta determines the position of this term's <TermPayloads, TermOffsets?> within
- * the .pay file. Similar to DocFPDelta, it is the difference between two file positions (or
- * neglected, for fields that omit payloads and offsets).</li>
- * <li>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed
- * block within the .pos file. It is synonym for PayVIntBlockFPDelta or OffsetVIntBlockFPDelta.
- * This is actually used to indicate whether it is necessary to load following
- * payloads and offsets from .pos instead of .pay. Every time a new block of positions are to be
- * loaded, the PostingsReader will use this value to check whether current block is packed format
- * or VInt. When packed format, payloads and offsets are fetched from .pay, otherwise from .pos.
- * (this value is neglected when total number of positions i.e. totalTermFreq is less or equal
- * to PackedBlockSize).
- * <li>SkipFPDelta determines the position of this term's SkipData within the .doc
- * file. In particular, it is the length of the TermFreq data.
- * SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- * (i.e. 128 in Lucene41PostingsFormat).</li>
- * <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
- * of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
- * single document ID is written to the term dictionary.</li>
- * </ul>
- * </dd>
- * </dl>
- *
- * <a name="Termindex" id="Termindex"></a>
- * <dl>
- * <dd>
- * <b>Term Index</b>
- * <p>The .tip file contains an index into the term dictionary, so that it can be
- * accessed randomly. See {@link BlockTreeTermsWriter} for more details on the format.</p>
- * </dd>
- * </dl>
- *
- *
- * <a name="Frequencies" id="Frequencies"></a>
- * <dl>
- * <dd>
- * <b>Frequencies and Skip Data</b>
- *
- * <p>The .doc file contains the lists of documents which contain each term, along
- * with the frequency of the term in that document (except when frequencies are
- * omitted: {@link IndexOptions#DOCS_ONLY}). It also saves skip data to the beginning of
- * each packed or VInt block, when the length of document list is larger than packed block size.</p>
- *
- * <ul>
- * <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>,
- * VIntBlock? </li>
- * <li>PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?
- * <li>VIntBlock --> <DocDelta[, Freq?]><sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup>
- * <li>SkipData --> <<SkipLevelLength, SkipLevel>
- * <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</li>
- * <li>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></li>
- * <li>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
- * PayFPSkip?>?, SkipChildLevelPointer?</li>
- * <li>PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}</li>
- * <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip
- * -->
- * {@link DataOutput#writeVInt VInt}</li>
- * <li>SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}</li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- * <li>PackedDocDeltaBlock is theoretically generated from two steps:
- * <ol>
- * <li>Calculate the difference between each document number and previous one,
- * and get a d-gaps list (for the first document, use absolute value); </li>
- * <li>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>,
- * separately encode as packed blocks.</li>
- * </ol>
- * If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step.
- * </li>
- * <li>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format
- * that encodes DocDelta and Freq:
- * <p>DocDelta: if frequencies are indexed, this determines both the document
- * number and the frequency. In particular, DocDelta/2 is the difference between
- * this document number and the previous document number (or zero when this is the
- * first document in a TermFreqs). When DocDelta is odd, the frequency is one.
- * When DocDelta is even, the frequency is read as another VInt. If frequencies
- * are omitted, DocDelta contains the gap (not multiplied by 2) between document
- * numbers and no frequency information is stored.</p>
- * <p>For example, the TermFreqs for a term which occurs once in document seven
- * and three times in document eleven, with frequencies indexed, would be the
- * following sequence of VInts:</p>
- * <p>15, 8, 3</p>
- * <p>If frequencies were omitted ({@link IndexOptions#DOCS_ONLY}) it would be this
- * sequence of VInts instead:</p>
- * <p>7,4</p>
- * </li>
- * <li>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies.
- * In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </li>
- * <li>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq.
- * We use this trick since the definition of skip entry is a little different from base interface.
- * In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
- * skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However,
- * in Lucene41PostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>,
- * 2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case).
- * When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one
- * more skip data than Lucene41SkipWriter. </li>
- * <li>SkipDatum is the metadata of one skip entry.
- * For the first block (no matter packed or VInt), it is omitted.</li>
- * <li>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in
- * the postings (i.e. last document number in each packed block). On disk it is stored as the
- * difference from previous value in the sequence. </li>
- * <li>DocFPSkip records the file offsets of each block (excluding )posting at
- * PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile.
- * The file offsets are relative to the start of current term's TermFreqs.
- * On disk it is also stored as the difference from previous SkipDatum in the sequence.</li>
- * <li>Since positions and payloads are also block encoded, the skip should skip to related block first,
- * then fetch the values according to in-block offset. PosFPSkip and PayFPSkip record the file
- * offsets of related block in .pos and .pay, respectively. While PosBlockOffset indicates
- * which value to fetch inside the related block (PayBlockOffset is unnecessary since it is always
- * equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of
- * current term's TermFreqs, and stored as a difference sequence.</li>
- * <li>PayByteUpto indicates the start offset of the current payload. It is equivalent to
- * the sum of the payload lengths in the current block up to PosBlockOffset</li>
- * </ul>
- * </dd>
- * </dl>
- *
- * <a name="Positions" id="Positions"></a>
- * <dl>
- * <dd>
- * <b>Positions</b>
- * <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
- * sometimes stores part of payloads and offsets for speedup.</p>
- * <ul>
- * <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>,
- * VIntBlock? </li>
- * <li>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?,
- * OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup>
- * <li>PackedPosDeltaBlock --> {@link PackedInts PackedInts}</li>
- * <li>PositionDelta, OffsetDelta, OffsetLength -->
- * {@link DataOutput#writeVInt VInt}</li>
- * <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- * <li>TermPositions are order by term (terms are implicit, from the term dictionary), and position
- * values for each term document pair are incremental, and ordered by document number.</li>
- * <li>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets.
- * In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </li>
- * <li>PosVIntCount is the number of positions encoded as VInt format. In particular,
- * PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</li>
- * <li>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock
- * in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</li>
- * <li>PositionDelta is, if payloads are disabled for the term's field, the
- * difference between the position of the current occurrence in the document and
- * the previous occurrence (or zero, if this is the first occurrence in this
- * document). If payloads are enabled for the term's field, then PositionDelta/2
- * is the difference between the current and the previous position. If payloads
- * are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
- * the length of the payload at the current term position.</li>
- * <li>For example, the TermPositions for a term which occurs as the fourth term in
- * one document, and as the fifth and ninth term in a subsequent document, would
- * be the following sequence of VInts (payloads disabled):
- * <p>4, 5, 4</p></li>
- * <li>PayloadData is metadata associated with the current term position. If
- * PayloadLength is stored at the current position, then it indicates the length
- * of this payload. If PayloadLength is not stored, then this payload has the same
- * length as the payload at the previous position.</li>
- * <li>OffsetDelta/2 is the difference between this position's startOffset from the
- * previous occurrence (or zero, if this is the first occurrence in this document).
- * If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
- * previous occurrence and an OffsetLength follows. Offset data is only written for
- * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.</li>
- * </ul>
- * </dd>
- * </dl>
- *
- * <a name="Payloads" id="Payloads"></a>
- * <dl>
- * <dd>
- * <b>Payloads and Offsets</b>
- * <p>The .pay file will store payloads and offsets associated with certain term-document positions.
- * Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
- * <ul>
- * <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup>
- * <li>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup>
- * <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}</li>
- * <li>SumPayLength --> {@link DataOutput#writeVInt VInt}</li>
- * <li>PayData --> {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
- * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
- * </ul>
- * <p>Notes:</p>
- * <ul>
- * <li>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of
- * payload/offsets are stored in .pos.</li>
- * <li>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the
- * same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.
- * While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</li>
- * <li>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym
- * for PackedOffsetBlockNum.</li>
- * <li>SumPayLength is the total length of payloads written within one block, should be the sum
- * of PayLengths in one packed block.</li>
- * <li>PayLength in PackedPayLengthBlock is the length of each payload associated with the current
- * position.</li>
- * </ul>
- * </dd>
- * </dl>
- * </p>
- *
- * @lucene.experimental
+ * Lucene 4.1 postings format.
+ * @deprecated only for reading old 4.x segments
*/
-
-public final class Lucene41PostingsFormat extends PostingsFormat {
+@Deprecated
+public class Lucene41PostingsFormat extends PostingsFormat {
/**
* Filename extension for document number, frequencies, and skip data.
* See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
@@ -376,9 +51,23 @@ public final class Lucene41PostingsForma
* See chapter: <a href="#Payloads">Payloads and Offsets</a>
*/
public static final String PAY_EXTENSION = "pay";
+
+ /**
+ * Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ static final int maxSkipLevels = 10;
- private final int minTermBlockSize;
- private final int maxTermBlockSize;
+ final static String TERMS_CODEC = "Lucene41PostingsWriterTerms";
+ final static String DOC_CODEC = "Lucene41PostingsWriterDoc";
+ final static String POS_CODEC = "Lucene41PostingsWriterPos";
+ final static String PAY_CODEC = "Lucene41PostingsWriterPay";
+
+ // Increment version to change it
+ final static int VERSION_START = 0;
+ final static int VERSION_META_ARRAY = 1;
+ final static int VERSION_CHECKSUM = 2;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
/**
* Fixed packed block size, number of integers encoded in
@@ -390,19 +79,7 @@ public final class Lucene41PostingsForma
/** Creates {@code Lucene41PostingsFormat} with default
* settings. */
public Lucene41PostingsFormat() {
- this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
- }
-
- /** Creates {@code Lucene41PostingsFormat} with custom
- * values for {@code minBlockSize} and {@code
- * maxBlockSize} passed to block terms dictionary.
- * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
- public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
super("Lucene41");
- this.minTermBlockSize = minTermBlockSize;
- assert minTermBlockSize > 1;
- this.maxTermBlockSize = maxTermBlockSize;
- assert minTermBlockSize <= maxTermBlockSize;
}
@Override
@@ -412,25 +89,11 @@ public final class Lucene41PostingsForma
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
-
- boolean success = false;
- try {
- FieldsConsumer ret = new BlockTreeTermsWriter(state,
- postingsWriter,
- minTermBlockSize,
- maxTermBlockSize);
- success = true;
- return ret;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(postingsWriter);
- }
- }
+ throw new UnsupportedOperationException("this codec can only be used for reading");
}
@Override
- public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory,
state.fieldInfos,
state.segmentInfo,
@@ -438,12 +101,7 @@ public final class Lucene41PostingsForma
state.segmentSuffix);
boolean success = false;
try {
- FieldsProducer ret = new BlockTreeTermsReader(state.directory,
- state.fieldInfos,
- state.segmentInfo,
- postingsReader,
- state.context,
- state.segmentSuffix);
+ FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java Mon Oct 20 18:25:52 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene4
import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
-import static org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.IntBlockTermState;
import java.io.IOException;
import java.util.Arrays;
@@ -32,6 +31,7 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -48,12 +48,10 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
- * Concrete class that reads docId(maybe frq,pos,offset,payloads) list
- * with postings format.
- *
- * @see Lucene41SkipReader for details
- * @lucene.experimental
+ * Lucene 4.1 postings format.
+ * @deprecated only for reading old 4.x segments
*/
+@Deprecated
public final class Lucene41PostingsReader extends PostingsReaderBase {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene41PostingsReader.class);
@@ -77,12 +75,12 @@ public final class Lucene41PostingsReade
docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION),
ioContext);
version = CodecUtil.checkHeader(docIn,
- Lucene41PostingsWriter.DOC_CODEC,
- Lucene41PostingsWriter.VERSION_START,
- Lucene41PostingsWriter.VERSION_CURRENT);
+ Lucene41PostingsFormat.DOC_CODEC,
+ Lucene41PostingsFormat.VERSION_START,
+ Lucene41PostingsFormat.VERSION_CURRENT);
forUtil = new ForUtil(docIn);
- if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ if (version >= Lucene41PostingsFormat.VERSION_CHECKSUM) {
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
@@ -93,9 +91,9 @@ public final class Lucene41PostingsReade
if (fieldInfos.hasProx()) {
posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
ioContext);
- CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, version, version);
+ CodecUtil.checkHeader(posIn, Lucene41PostingsFormat.POS_CODEC, version, version);
- if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ if (version >= Lucene41PostingsFormat.VERSION_CHECKSUM) {
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
@@ -106,9 +104,9 @@ public final class Lucene41PostingsReade
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
ioContext);
- CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, version, version);
+ CodecUtil.checkHeader(payIn, Lucene41PostingsFormat.PAY_CODEC, version, version);
- if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ if (version >= Lucene41PostingsFormat.VERSION_CHECKSUM) {
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
@@ -130,12 +128,12 @@ public final class Lucene41PostingsReade
}
@Override
- public void init(IndexInput termsIn) throws IOException {
+ public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
// Make sure we are talking to the matching postings writer
CodecUtil.checkHeader(termsIn,
- Lucene41PostingsWriter.TERMS_CODEC,
- Lucene41PostingsWriter.VERSION_START,
- Lucene41PostingsWriter.VERSION_CURRENT);
+ Lucene41PostingsFormat.TERMS_CODEC,
+ Lucene41PostingsFormat.VERSION_START,
+ Lucene41PostingsFormat.VERSION_CURRENT);
final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
@@ -187,7 +185,7 @@ public final class Lucene41PostingsReade
termState.posStartFP = 0;
termState.payStartFP = 0;
}
- if (version < Lucene41PostingsWriter.VERSION_META_ARRAY) { // backward compatibility
+ if (version < Lucene41PostingsFormat.VERSION_META_ARRAY) { // backward compatibility
_decodeTerm(in, fieldInfo, termState);
return;
}
@@ -488,7 +486,7 @@ public final class Lucene41PostingsReade
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper = new Lucene41SkipReader(docIn.clone(),
- Lucene41PostingsWriter.maxSkipLevels,
+ Lucene41PostingsFormat.maxSkipLevels,
BLOCK_SIZE,
indexHasPos,
indexHasOffsets,
@@ -821,7 +819,7 @@ public final class Lucene41PostingsReade
// System.out.println(" create skipper");
// }
skipper = new Lucene41SkipReader(docIn.clone(),
- Lucene41PostingsWriter.maxSkipLevels,
+ Lucene41PostingsFormat.maxSkipLevels,
BLOCK_SIZE,
true,
indexHasOffsets,
@@ -1347,7 +1345,7 @@ public final class Lucene41PostingsReade
// System.out.println(" create skipper");
// }
skipper = new Lucene41SkipReader(docIn.clone(),
- Lucene41PostingsWriter.maxSkipLevels,
+ Lucene41PostingsFormat.maxSkipLevels,
BLOCK_SIZE,
true,
indexHasOffsets,
@@ -1590,7 +1588,7 @@ public final class Lucene41PostingsReade
@Override
public void checkIntegrity() throws IOException {
- if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ if (version >= Lucene41PostingsFormat.VERSION_CHECKSUM) {
if (docIn != null) {
CodecUtil.checksumEntireFile(docIn);
}
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java Mon Oct 20 18:25:52 2014
@@ -24,32 +24,10 @@ import org.apache.lucene.codecs.MultiLev
import org.apache.lucene.store.IndexInput;
/**
- * Implements the skip list reader for block postings format
- * that stores positions and payloads.
- *
- * Although this skipper uses MultiLevelSkipListReader as an interface,
- * its definition of skip position will be a little different.
- *
- * For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6,
- *
- * 0 1 2 3 4 5
- * d d d d d d (posting list)
- * ^ ^ (skip point in MultiLeveSkipWriter)
- * ^ (skip point in Lucene41SkipWriter)
- *
- * In this case, MultiLevelSkipListReader will use the last document as a skip point,
- * while Lucene41SkipReader should assume no skip point will comes.
- *
- * If we use the interface directly in Lucene41SkipReader, it may silly try to read
- * another skip data after the only skip point is loaded.
- *
- * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId,
- * and numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list
- * isn't exhausted yet, and try to load a non-existed skip point
- *
- * Therefore, we'll trim df before passing it to the interface. see trim(int)
- *
+ * Lucene 4.1 skiplist format.
+ * @deprecated only for reading old 4.x segments
*/
+@Deprecated
final class Lucene41SkipReader extends MultiLevelSkipListReader {
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
private final int blockSize;
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Mon Oct 20 18:25:52 2014
@@ -14,3 +14,4 @@
# limitations under the License.
org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
+org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Mon Oct 20 18:25:52 2014
@@ -129,7 +129,7 @@ final class Lucene40PostingsWriter exten
}
@Override
- public void init(IndexOutput termsOut) throws IOException {
+ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
CodecUtil.writeHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java Mon Oct 20 18:25:52 2014
@@ -26,7 +26,6 @@ import org.apache.lucene.index.CorruptIn
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.TermState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
@@ -38,35 +37,13 @@ import static org.apache.lucene.codecs.l
import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
-
/**
- * Concrete class that writes docId(maybe frq,pos,offset,payloads) list
- * with postings format.
- *
- * Postings list for each term will be stored separately.
- *
- * @see Lucene41SkipWriter for details about skipping setting and postings layout.
- * @lucene.experimental
+ * Writes 4.1 postings for testing
+ * @deprecated for test purposes only
*/
+@Deprecated
public final class Lucene41PostingsWriter extends PushPostingsWriterBase {
- /**
- * Expert: The maximum number of skip levels. Smaller values result in
- * slightly smaller indexes, but slower skipping in big posting lists.
- */
- static final int maxSkipLevels = 10;
-
- final static String TERMS_CODEC = "Lucene41PostingsWriterTerms";
- final static String DOC_CODEC = "Lucene41PostingsWriterDoc";
- final static String POS_CODEC = "Lucene41PostingsWriterPos";
- final static String PAY_CODEC = "Lucene41PostingsWriterPay";
-
- // Increment version to change it
- final static int VERSION_START = 0;
- final static int VERSION_META_ARRAY = 1;
- final static int VERSION_CHECKSUM = 2;
- final static int VERSION_CURRENT = VERSION_CHECKSUM;
-
IndexOutput docOut;
IndexOutput posOut;
IndexOutput payOut;
@@ -119,13 +96,13 @@ public final class Lucene41PostingsWrite
IndexOutput payOut = null;
boolean success = false;
try {
- CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
+ CodecUtil.writeHeader(docOut, Lucene41PostingsFormat.DOC_CODEC, Lucene41PostingsFormat.VERSION_CURRENT);
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new int[MAX_DATA_SIZE];
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
state.context);
- CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
+ CodecUtil.writeHeader(posOut, Lucene41PostingsFormat.POS_CODEC, Lucene41PostingsFormat.VERSION_CURRENT);
if (state.fieldInfos.hasPayloads()) {
payloadBytes = new byte[128];
@@ -146,7 +123,7 @@ public final class Lucene41PostingsWrite
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
state.context);
- CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
+ CodecUtil.writeHeader(payOut, Lucene41PostingsFormat.PAY_CODEC, Lucene41PostingsFormat.VERSION_CURRENT);
}
} else {
posDeltaBuffer = null;
@@ -168,7 +145,7 @@ public final class Lucene41PostingsWrite
freqBuffer = new int[MAX_DATA_SIZE];
// TODO: should we try skipping every 2/4 blocks...?
- skipWriter = new Lucene41SkipWriter(maxSkipLevels,
+ skipWriter = new Lucene41SkipWriter(Lucene41PostingsFormat.maxSkipLevels,
BLOCK_SIZE,
state.segmentInfo.getDocCount(),
docOut,
@@ -183,50 +160,14 @@ public final class Lucene41PostingsWrite
this(state, PackedInts.COMPACT);
}
- final static class IntBlockTermState extends BlockTermState {
- long docStartFP = 0;
- long posStartFP = 0;
- long payStartFP = 0;
- long skipOffset = -1;
- long lastPosBlockOffset = -1;
- // docid when there is a single pulsed posting, otherwise -1
- // freq is always implicitly totalTermFreq in this case.
- int singletonDocID = -1;
-
- @Override
- public IntBlockTermState clone() {
- IntBlockTermState other = new IntBlockTermState();
- other.copyFrom(this);
- return other;
- }
-
- @Override
- public void copyFrom(TermState _other) {
- super.copyFrom(_other);
- IntBlockTermState other = (IntBlockTermState) _other;
- docStartFP = other.docStartFP;
- posStartFP = other.posStartFP;
- payStartFP = other.payStartFP;
- lastPosBlockOffset = other.lastPosBlockOffset;
- skipOffset = other.skipOffset;
- singletonDocID = other.singletonDocID;
- }
-
-
- @Override
- public String toString() {
- return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
- }
- }
-
@Override
public IntBlockTermState newTermState() {
return new IntBlockTermState();
}
@Override
- public void init(IndexOutput termsOut) throws IOException {
- CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
+ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
+ CodecUtil.writeHeader(termsOut, Lucene41PostingsFormat.TERMS_CODEC, Lucene41PostingsFormat.VERSION_CURRENT);
termsOut.writeVInt(BLOCK_SIZE);
}
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java Mon Oct 20 18:25:52 2014
@@ -3,6 +3,7 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
@@ -40,6 +41,12 @@ public final class Lucene41RWCodec exten
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat();
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
@Override
public FieldInfosFormat fieldInfosFormat() {
Added: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWPostingsFormat.java?rev=1633196&view=auto
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWPostingsFormat.java (added)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41RWPostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -0,0 +1,56 @@
+package org.apache.lucene.codecs.lucene41;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Read-write version of 4.1 postings format for testing
+ * @deprecated for test purposes only
+ */
+@Deprecated
+public class Lucene41RWPostingsFormat extends Lucene41PostingsFormat {
+
+ static final int MIN_BLOCK_SIZE = 25;
+ static final int MAX_BLOCK_SIZE = 48;
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state);
+
+ boolean success = false;
+ try {
+ FieldsConsumer ret = new BlockTreeTermsWriter(state,
+ postingsWriter,
+ MIN_BLOCK_SIZE,
+ MAX_BLOCK_SIZE);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(postingsWriter);
+ }
+ }
+ }
+}
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java Mon Oct 20 18:25:52 2014
@@ -24,25 +24,10 @@ import org.apache.lucene.store.IndexOutp
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
/**
- * Write skip lists with multiple levels, and support skip within block ints.
- *
- * Assume that docFreq = 28, skipInterval = blockSize = 12
- *
- * | block#0 | | block#1 | |vInts|
- * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
- * ^ ^ (level 0 skip point)
- *
- * Note that skipWriter will ignore first document in block#0, since
- * it is useless as a skip point. Also, we'll never skip into the vInts
- * block, only record skip data at the start its start point(if it exist).
- *
- * For each skip point, we will record:
- * 1. docID in former position, i.e. for position 12, record docID[11], etc.
- * 2. its related file points(position, payload),
- * 3. related numbers or uptos(position, payload).
- * 4. start offset.
- *
+ * Writes 4.1 skiplists for testing
+ * @deprecated for test purposes only
*/
+@Deprecated
final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
// private boolean DEBUG = Lucene41PostingsReader.DEBUG;
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41ForUtil.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41ForUtil.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41ForUtil.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41ForUtil.java Mon Oct 20 18:25:52 2014
@@ -34,7 +34,7 @@ import org.apache.lucene.util.packed.Pac
import com.carrotsearch.randomizedtesting.generators.RandomInts;
-public class TestForUtil extends LuceneTestCase {
+public class TestLucene41ForUtil extends LuceneTestCase {
public void testEncodeDecode() throws IOException {
final int iterations = RandomInts.randomIntBetween(random(), 1, 1000);
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java Mon Oct 20 18:25:52 2014
@@ -28,13 +28,12 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.TestUtil;
/**
* Tests BlockPostingsFormat
*/
-public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
- private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat());
+public class TestLucene41PostingsFormat extends BasePostingsFormatTestCase {
+ private final Codec codec = new Lucene41RWCodec();
@Override
protected Codec getCodec() {
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat2.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat2.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat2.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat2.java Mon Oct 20 18:25:52 2014
@@ -36,7 +36,7 @@ import org.apache.lucene.util.TestUtil;
* Tests special cases of BlockPostingsFormat
*/
-public class TestBlockPostingsFormat2 extends LuceneTestCase {
+public class TestLucene41PostingsFormat2 extends LuceneTestCase {
Directory dir;
RandomIndexWriter iw;
@@ -45,7 +45,7 @@ public class TestBlockPostingsFormat2 ex
super.setUp();
dir = newFSDirectory(createTempDir("testDFBlockSize"));
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
+ iwc.setCodec(new Lucene41RWCodec());
iw = new RandomIndexWriter(random(), dir, iwc);
iw.setDoRandomForceMerge(false); // we will ourselves
}
@@ -55,7 +55,7 @@ public class TestBlockPostingsFormat2 ex
iw.close();
TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
+ iwc.setCodec(new Lucene41RWCodec());
iwc.setOpenMode(OpenMode.APPEND);
IndexWriter iw = new IndexWriter(dir, iwc);
iw.forceMerge(1);
Copied: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat3.java (from r1632459, lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat3.java?p2=lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat3.java&p1=lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java&r1=1632459&r2=1633196&rev=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat3.java Mon Oct 20 18:25:52 2014
@@ -60,7 +60,7 @@ import org.apache.lucene.util.automaton.
/**
* Tests partial enumeration (only pulling a subset of the indexed data)
*/
-public class TestBlockPostingsFormat3 extends LuceneTestCase {
+public class TestLucene41PostingsFormat3 extends LuceneTestCase {
static final int MAXDOC = Lucene41PostingsFormat.BLOCK_SIZE * 20;
// creates 8 fields with different options and does "duels" of fields against each other
@@ -82,7 +82,7 @@ public class TestBlockPostingsFormat3 ex
}
};
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
+ iwc.setCodec(new Lucene41RWCodec());
// TODO we could actually add more fields implemented with different PFs
// or, just put this test into the usual rotation?
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
@@ -137,7 +137,7 @@ public class TestBlockPostingsFormat3 ex
verify(dir);
TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
iwc = newIndexWriterConfig(analyzer);
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
+ iwc.setCodec(new Lucene41RWCodec());
iwc.setOpenMode(OpenMode.APPEND);
IndexWriter iw2 = new IndexWriter(dir, iwc);
iw2.forceMerge(1);
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/Lucene410RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/Lucene410RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/Lucene410RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/Lucene410RWCodec.java Mon Oct 20 18:25:52 2014
@@ -19,9 +19,11 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
import org.apache.lucene.codecs.lucene46.Lucene46RWSegmentInfoFormat;
@@ -34,6 +36,13 @@ import org.apache.lucene.codecs.lucene49
@Deprecated
public final class Lucene410RWCodec extends Lucene410Codec {
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
+
private static final DocValuesFormat docValues = new Lucene410RWDocValuesFormat();
@Override
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/TestLucene410DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/TestLucene410DocValuesFormat.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/TestLucene410DocValuesFormat.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene410/TestLucene410DocValuesFormat.java Mon Oct 20 18:25:52 2014
@@ -26,8 +26,6 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.blocktreeords.Ords41PostingsFormat;
-import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
@@ -121,17 +119,7 @@ public class TestLucene410DocValuesForma
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
// set to duel against a codec which has ordinals:
- final PostingsFormat pf;
- switch (random().nextInt(2)) {
- case 0: pf = new Lucene41WithOrds();
- break;
- case 1: pf = new Ords41PostingsFormat();
- break;
- // TODO: these don't actually support ords!
- //case 2: pf = new FSTOrdPostingsFormat();
- // break;
- default: throw new AssertionError();
- }
+ final PostingsFormat pf = TestUtil.getPostingsFormatWithOrds(random());
final DocValuesFormat dv = new Lucene410RWDocValuesFormat();
conf.setCodec(new AssertingCodec() {
@Override
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java Mon Oct 20 18:25:52 2014
@@ -20,10 +20,12 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
/**
@@ -37,6 +39,13 @@ public final class Lucene42RWCodec exten
private static final NormsFormat norms = new Lucene42RWNormsFormat();
private static final StoredFieldsFormat storedFields = new Lucene41RWStoredFieldsFormat();
private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
+
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java Mon Oct 20 18:25:52 2014
@@ -20,10 +20,12 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWFieldInfosFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
@@ -36,6 +38,13 @@ import org.apache.lucene.codecs.lucene42
@Deprecated
public final class Lucene45RWCodec extends Lucene45Codec {
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
+
private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
@Override
Modified: lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java?rev=1633196&r1=1633195&r2=1633196&view=diff
==============================================================================
--- lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java (original)
+++ lucene/dev/branches/lucene5969/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene46/Lucene46RWCodec.java Mon Oct 20 18:25:52 2014
@@ -19,9 +19,11 @@ package org.apache.lucene.codecs.lucene4
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
@@ -34,6 +36,13 @@ import org.apache.lucene.codecs.lucene45
@Deprecated
public final class Lucene46RWCodec extends Lucene46Codec {
+ private final PostingsFormat postings = new Lucene41RWPostingsFormat();
+
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return postings;
+ }
+
private static final DocValuesFormat docValues = new Lucene45RWDocValuesFormat();
@Override