You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/08/15 15:15:46 UTC
svn commit: r1514253 - in /lucene/dev/branches/lucene3069/lucene:
codecs/src/java/org/apache/lucene/codecs/temp/
codecs/src/resources/META-INF/services/
core/src/java/org/apache/lucene/codecs/temp/
core/src/resources/META-INF/services/
Author: han
Date: Thu Aug 15 13:15:46 2013
New Revision: 1514253
URL: http://svn.apache.org/r1514253
Log:
LUCENE-3069: API refactoring on BlockTerms dict
Added:
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreePostingsFormat.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsReader.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java
- copied, changed from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
Removed:
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java
Modified:
lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java
lucene/dev/branches/lucene3069/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java (from r1514245, lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java Thu Aug 15 13:15:46 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.lucene41ords;
+package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -22,8 +22,8 @@ import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.TempPostingsReaderBase;
+import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
@@ -44,21 +44,21 @@ import org.apache.lucene.util.BytesRef;
* Customized version of {@link Lucene41PostingsFormat} that uses
* {@link FixedGapTermsIndexWriter}.
*/
-public final class Lucene41WithOrds extends PostingsFormat {
+public final class TempBlockPostingsFormat extends PostingsFormat {
final int termIndexInterval;
- public Lucene41WithOrds() {
+ public TempBlockPostingsFormat() {
this(FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
}
- public Lucene41WithOrds(int termIndexInterval) {
- super("Lucene41WithOrds");
+ public TempBlockPostingsFormat(int termIndexInterval) {
+ super("TempBlock");
this.termIndexInterval = termIndexInterval;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase docs = new Lucene41PostingsWriter(state);
+ TempPostingsWriterBase docs = new TempPostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
@@ -79,7 +79,7 @@ public final class Lucene41WithOrds exte
try {
// Must use BlockTermsWriter (not BlockTree) because
// BlockTree doens't support ords (yet)...
- FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
+ FieldsConsumer ret = new TempBlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
@@ -95,7 +95,7 @@ public final class Lucene41WithOrds exte
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase postings = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
+ TempPostingsReaderBase postings = new TempPostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
TermsIndexReaderBase indexReader;
boolean success = false;
@@ -114,7 +114,7 @@ public final class Lucene41WithOrds exte
success = false;
try {
- FieldsProducer ret = new BlockTermsReader(indexReader,
+ FieldsProducer ret = new TempBlockTermsReader(indexReader,
state.directory,
state.fieldInfos,
state.segmentInfo,
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java (from r1514245, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java Thu Aug 15 13:15:46 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.blockterms;
+package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -22,11 +22,11 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
+import java.util.Arrays;
-import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.TempPostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
@@ -46,10 +46,11 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DoubleBarrelLRUCache;
+import org.apache.lucene.codecs.blockterms.*;
/** Handles a terms dict, but decouples all details of
* doc/freqs/positions reading to an instance of {@link
- * PostingsReaderBase}. This class is reusable for
+ * TempPostingsReaderBase}. This class is reusable for
* codecs that use a different format for
* docs/freqs/positions (though codecs are also free to
* make their own terms dict impl).
@@ -59,13 +60,13 @@ import org.apache.lucene.util.DoubleBarr
* implementation of the terms dict index.
* @lucene.experimental */
-public class BlockTermsReader extends FieldsProducer {
+public class TempBlockTermsReader extends FieldsProducer {
// Open input to the main terms dict file (_X.tis)
private final IndexInput in;
// Reads the terms dict entries, to gather state to
// produce DocsEnum on demand
- private final PostingsReaderBase postingsReader;
+ private final TempPostingsReaderBase postingsReader;
private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
@@ -109,14 +110,14 @@ public class BlockTermsReader extends Fi
// private String segment;
- public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context,
+ public TempBlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, TempPostingsReaderBase postingsReader, IOContext context,
String segmentSuffix)
throws IOException {
this.postingsReader = postingsReader;
// this.segment = segment;
- in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
+ in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, TempBlockTermsWriter.TERMS_EXTENSION),
context);
boolean success = false;
@@ -142,6 +143,7 @@ public class BlockTermsReader extends Fi
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
+ final int longsSize = in.readVInt();
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@@ -151,7 +153,7 @@ public class BlockTermsReader extends Fi
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
- FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
+ FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
}
@@ -167,17 +169,17 @@ public class BlockTermsReader extends Fi
}
private int readHeader(IndexInput input) throws IOException {
- int version = CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
- BlockTermsWriter.VERSION_START,
- BlockTermsWriter.VERSION_CURRENT);
- if (version < BlockTermsWriter.VERSION_APPEND_ONLY) {
+ int version = CodecUtil.checkHeader(input, TempBlockTermsWriter.CODEC_NAME,
+ TempBlockTermsWriter.VERSION_START,
+ TempBlockTermsWriter.VERSION_CURRENT);
+ if (version < TempBlockTermsWriter.VERSION_APPEND_ONLY) {
dirOffset = input.readLong();
}
return version;
}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
+ if (version >= TempBlockTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@@ -230,8 +232,9 @@ public class BlockTermsReader extends Fi
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
+ final int longsSize;
- FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
@@ -239,6 +242,7 @@ public class BlockTermsReader extends Fi
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
+ this.longsSize = longsSize;
}
@Override
@@ -289,7 +293,7 @@ public class BlockTermsReader extends Fi
// Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in;
- private final BlockTermState state;
+ private final TempTermState state;
private final boolean doOrd;
private final FieldAndTerm fieldTerm = new FieldAndTerm();
private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
@@ -326,8 +330,13 @@ public class BlockTermsReader extends Fi
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;
+ private long[] longs;
+ private byte[] bytes;
+ private ByteArrayDataInput bytesReader;
+
+
public SegmentTermsEnum() throws IOException {
- in = BlockTermsReader.this.in.clone();
+ in = TempBlockTermsReader.this.in.clone();
in.seek(termsStartPointer);
indexEnum = indexReader.getFieldEnum(fieldInfo);
doOrd = indexReader.supportsOrd();
@@ -339,6 +348,7 @@ public class BlockTermsReader extends Fi
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
+ longs = new long[longsSize];
}
@Override
@@ -359,7 +369,7 @@ public class BlockTermsReader extends Fi
throw new IllegalStateException("terms index was not loaded");
}
- //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
+ //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
if (didIndexNext) {
if (nextIndexTerm == null) {
//System.out.println(" nextIndexTerm=null");
@@ -415,7 +425,7 @@ public class BlockTermsReader extends Fi
assert result;
indexIsCurrent = true;
- didIndexNext = false;
+ didIndexNext = false;
if (doOrd) {
state.ord = indexEnum.ord()-1;
@@ -686,8 +696,8 @@ public class BlockTermsReader extends Fi
@Override
public void seekExact(BytesRef target, TermState otherState) {
//System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this);
- assert otherState != null && otherState instanceof BlockTermState;
- assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
+ assert otherState != null && otherState instanceof TempTermState;
+ assert !doOrd || ((TempTermState) otherState).ord < numTerms;
state.copyFrom(otherState);
seekPending = true;
indexIsCurrent = false;
@@ -789,11 +799,20 @@ public class BlockTermsReader extends Fi
//System.out.println(" freq bytes len=" + len);
in.readBytes(docFreqBytes, 0, len);
freqReader.reset(docFreqBytes, 0, len);
- metaDataUpto = 0;
- state.termBlockOrd = 0;
+ // metadata
+ len = in.readVInt();
+ if (bytes == null) {
+ bytes = new byte[ArrayUtil.oversize(len, 1)];
+ bytesReader = new ByteArrayDataInput();
+ } else if (bytes.length < len) {
+ bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ in.readBytes(bytes, 0, len);
+ bytesReader.reset(bytes, 0, len);
- postingsReader.readTermsBlock(in, fieldInfo, state);
+ metaDataUpto = 0;
+ state.termBlockOrd = 0;
indexIsCurrent = false;
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
@@ -814,6 +833,9 @@ public class BlockTermsReader extends Fi
// We must set/incr state.termCount because
// postings impl can look at this
state.termBlockOrd = metaDataUpto;
+ if (metaDataUpto == 0) {
+ Arrays.fill(longs, 0);
+ }
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
@@ -825,14 +847,19 @@ public class BlockTermsReader extends Fi
// TODO: if docFreq were bulk decoded we could
// just skipN here:
+
+ // docFreq, totalTermFreq
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}
-
- postingsReader.nextTerm(fieldInfo, state);
+ // metadata
+ for (int i = 0; i < longs.length; i++) {
+ longs[i] += bytesReader.readVLong();
+ }
+ postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state);
metaDataUpto++;
state.termBlockOrd++;
}
Copied: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java (from r1514245, lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java?p2=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java&p1=lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java Thu Aug 15 13:15:46 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.blockterms;
+package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -19,13 +19,14 @@ package org.apache.lucene.codecs.blockte
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
@@ -39,6 +40,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.codecs.blockterms.*;
// TODO: currently we encode all terms between two indexed
// terms as a block; but, we could decouple the two, ie
@@ -52,7 +54,7 @@ import org.apache.lucene.util.RamUsageEs
* @lucene.experimental
*/
-public class BlockTermsWriter extends FieldsConsumer {
+public class TempBlockTermsWriter extends FieldsConsumer {
final static String CODEC_NAME = "BLOCK_TERMS_DICT";
@@ -65,7 +67,7 @@ public class BlockTermsWriter extends Fi
static final String TERMS_EXTENSION = "tib";
protected final IndexOutput out;
- final PostingsWriterBase postingsWriter;
+ final TempPostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
FieldInfo currentField;
private final TermsIndexWriterBase termsIndexWriter;
@@ -77,8 +79,9 @@ public class BlockTermsWriter extends Fi
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
+ public final int longsSize;
- public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
@@ -86,6 +89,7 @@ public class BlockTermsWriter extends Fi
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
+ this.longsSize = longsSize;
}
}
@@ -93,8 +97,8 @@ public class BlockTermsWriter extends Fi
// private final String segment;
- public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
- SegmentWriteState state, PostingsWriterBase postingsWriter)
+ public TempBlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
+ SegmentWriteState state, TempPostingsWriterBase postingsWriter)
throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
this.termsIndexWriter = termsIndexWriter;
@@ -148,6 +152,7 @@ public class BlockTermsWriter extends Fi
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
+ out.writeVInt(field.longsSize);
}
writeTrailer(dirStart);
} finally {
@@ -162,17 +167,20 @@ public class BlockTermsWriter extends Fi
private static class TermEntry {
public final BytesRef term = new BytesRef();
public TermStats stats;
+ public long[] longs;
+ public byte[] bytes;
}
class TermsWriter extends TermsConsumer {
private final FieldInfo fieldInfo;
- private final PostingsWriterBase postingsWriter;
+ private final TempPostingsWriterBase postingsWriter;
private final long termsStartPointer;
private long numTerms;
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
+ int longsSize;
private TermEntry[] pendingTerms;
@@ -181,7 +189,7 @@ public class BlockTermsWriter extends Fi
TermsWriter(
TermsIndexWriterBase.FieldWriter fieldIndexWriter,
FieldInfo fieldInfo,
- PostingsWriterBase postingsWriter)
+ TempPostingsWriterBase postingsWriter)
{
this.fieldInfo = fieldInfo;
this.fieldIndexWriter = fieldIndexWriter;
@@ -190,8 +198,8 @@ public class BlockTermsWriter extends Fi
pendingTerms[i] = new TermEntry();
}
termsStartPointer = out.getFilePointer();
- postingsWriter.setField(fieldInfo);
this.postingsWriter = postingsWriter;
+ this.longsSize = postingsWriter.setField(fieldInfo);
}
@Override
@@ -238,10 +246,13 @@ public class BlockTermsWriter extends Fi
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
te.stats = stats;
+ te.longs = new long[longsSize];
+ postingsWriter.finishTerm(te.longs, bytesWriter, stats);
+ te.bytes = new byte[(int) bytesWriter.getFilePointer()];
+ bytesWriter.writeTo(te.bytes, 0);
+ bytesWriter.reset();
pendingCount++;
-
- postingsWriter.finishTerm(stats);
numTerms++;
}
@@ -264,7 +275,8 @@ public class BlockTermsWriter extends Fi
termsStartPointer,
sumTotalTermFreq,
sumDocFreq,
- docCount));
+ docCount,
+ longsSize));
}
}
@@ -325,12 +337,26 @@ public class BlockTermsWriter extends Fi
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
}
}
+ out.writeVInt((int) bytesWriter.getFilePointer());
+ bytesWriter.writeTo(out);
+ bytesWriter.reset();
+ // 4th pass: write the metadata
+ long[] lastLongs = new long[longsSize];
+ Arrays.fill(lastLongs, 0);
+ for(int termCount=0;termCount<pendingCount;termCount++) {
+ final long[] longs = pendingTerms[termCount].longs;
+ final byte[] bytes = pendingTerms[termCount].bytes;
+ for (int i = 0; i < longsSize; i++) {
+ bytesWriter.writeVLong(longs[i] - lastLongs[i]);
+ }
+ lastLongs = longs;
+ bytesWriter.writeBytes(bytes, 0, bytes.length);
+ }
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
- postingsWriter.flushTermsBlock(pendingCount, pendingCount);
lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
pendingCount = 0;
}
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1514253&r1=1514252&r2=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Thu Aug 15 13:15:46 2013
@@ -18,3 +18,4 @@ org.apache.lucene.codecs.simpletext.Simp
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
+org.apache.lucene.codecs.temp.TempBlockPostingsFormat
Copied: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreePostingsFormat.java (from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreePostingsFormat.java?p2=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreePostingsFormat.java&p1=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreePostingsFormat.java Thu Aug 15 13:15:46 2013
@@ -56,7 +56,7 @@ import org.apache.lucene.util.packed.Pac
*
* <li>
* <b>Block structure</b>:
- * <p>When the postings are long enough, TempBlockPostingsFormat will try to encode most integer data
+ * <p>When the postings are long enough, TempBlockTreePostingsFormat will try to encode most integer data
* as a packed block.</p>
* <p>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed
* blocks, while the remaining 3 are encoded as one VInt block. </p>
@@ -116,7 +116,7 @@ import org.apache.lucene.util.packed.Pac
* field along with per-term statistics (such as docfreq)
* and pointers to the frequencies, positions, payload and
* skip data in the .doc, .pos, and .pay files.
- * See {@link TempBlockTermsWriter} for more details on the format.
+ * See {@link TempBlockTreeTermsWriter} for more details on the format.
* </p>
*
* <p>NOTE: The term dictionary can plug into different postings implementations:
@@ -159,7 +159,7 @@ import org.apache.lucene.util.packed.Pac
* <li>SkipFPDelta determines the position of this term's SkipData within the .doc
* file. In particular, it is the length of the TermFreq data.
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- * (i.e. 8 in TempBlockPostingsFormat).</li>
+ * (i.e. 8 in TempBlockTreePostingsFormat).</li>
* <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
* of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
* single document ID is written to the term dictionary.</li>
@@ -172,7 +172,7 @@ import org.apache.lucene.util.packed.Pac
* <dd>
* <b>Term Index</b>
* <p>The .tip file contains an index into the term dictionary, so that it can be
- * accessed randomly. See {@link TempBlockTermsWriter} for more details on the format.</p>
+ * accessed randomly. See {@link TempBlockTreeTermsWriter} for more details on the format.</p>
* </dd>
* </dl>
*
@@ -239,7 +239,7 @@ import org.apache.lucene.util.packed.Pac
* We use this trick since the definition of skip entry is a little different from base interface.
* In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
* skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However,
- * in TempBlockPostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>,
+ * in TempBlockTreePostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>,
* 2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case).
* When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one
* more skip data than TempSkipWriter. </li>
@@ -352,7 +352,7 @@ import org.apache.lucene.util.packed.Pac
* @lucene.experimental
*/
-public final class TempBlockPostingsFormat extends PostingsFormat {
+public final class TempBlockTreePostingsFormat extends PostingsFormat {
/**
* Filename extension for document number, frequencies, and skip data.
* See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
@@ -381,18 +381,18 @@ public final class TempBlockPostingsForm
// NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
public final static int BLOCK_SIZE = 128;
- /** Creates {@code TempBlockPostingsFormat} with default
+ /** Creates {@code TempBlockTreePostingsFormat} with default
* settings. */
- public TempBlockPostingsFormat() {
- this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ public TempBlockTreePostingsFormat() {
+ this(TempBlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
- /** Creates {@code TempBlockPostingsFormat} with custom
+ /** Creates {@code TempBlockTreePostingsFormat} with custom
* values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
- * @see TempBlockTermsWriter#TempBlockTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int) */
- public TempBlockPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
- super("TempBlock");
+ * @see TempBlockTreeTermsWriter#TempBlockTreeTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int) */
+ public TempBlockTreePostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+ super("TempBlockTree");
this.minTermBlockSize = minTermBlockSize;
assert minTermBlockSize > 1;
this.maxTermBlockSize = maxTermBlockSize;
@@ -410,7 +410,7 @@ public final class TempBlockPostingsForm
boolean success = false;
try {
- FieldsConsumer ret = new TempBlockTermsWriter(state,
+ FieldsConsumer ret = new TempBlockTreeTermsWriter(state,
postingsWriter,
minTermBlockSize,
maxTermBlockSize);
@@ -432,7 +432,7 @@ public final class TempBlockPostingsForm
state.segmentSuffix);
boolean success = false;
try {
- FieldsProducer ret = new TempBlockTermsReader(state.directory,
+ FieldsProducer ret = new TempBlockTreeTermsReader(state.directory,
state.fieldInfos,
state.segmentInfo,
postingsReader,
Copied: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsReader.java (from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsReader.java?p2=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsReader.java&p1=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsReader.java Thu Aug 15 13:15:46 2013
@@ -72,9 +72,9 @@ import org.apache.lucene.codecs.Postings
* does not support a pluggable terms index
* implementation).
*
- * <p><b>NOTE</b>: this terms dictionary does not support
- * index divisor when opening an IndexReader. Instead, you
- * can change the min/maxItemsPerBlock during indexing.</p>
+ * <p><b>NOTE</b>: this terms dictionary supports
+ * min/maxItemsPerBlock during indexing to control how
+ * much memory the terms index uses.</p>
*
* <p>The data structure used by this implementation is very
* similar to a burst trie
@@ -86,17 +86,17 @@ import org.apache.lucene.codecs.Postings
* option to see summary statistics on the blocks in the
* dictionary.
*
- * See {@link TempBlockTermsWriter}.
+ * See {@link TempBlockTreeTermsWriter}.
*
* @lucene.experimental
*/
-public class TempBlockTermsReader extends FieldsProducer {
+public class TempBlockTreeTermsReader extends FieldsProducer {
// Open input to the main terms dict file (_X.tib)
private final IndexInput in;
- //private static final boolean DEBUG = TempBlockTermsWriter.DEBUG;
+ //private static final boolean DEBUG = TempBlockTreeTermsWriter.DEBUG;
// Reads the terms dict entries, to gather state to
// produce DocsEnum on demand
@@ -115,7 +115,7 @@ public class TempBlockTermsReader extend
private final int version;
/** Sole constructor. */
- public TempBlockTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
+ public TempBlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
TempPostingsReaderBase postingsReader, IOContext ioContext,
String segmentSuffix)
throws IOException {
@@ -123,7 +123,7 @@ public class TempBlockTermsReader extend
this.postingsReader = postingsReader;
this.segment = info.name;
- in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_EXTENSION),
+ in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTreeTermsWriter.TERMS_EXTENSION),
ioContext);
boolean success = false;
@@ -131,7 +131,7 @@ public class TempBlockTermsReader extend
try {
version = readHeader(in);
- indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_INDEX_EXTENSION),
+ indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
ioContext);
int indexVersion = readIndexHeader(indexIn);
if (indexVersion != version) {
@@ -192,10 +192,10 @@ public class TempBlockTermsReader extend
/** Reads terms file header. */
private int readHeader(IndexInput input) throws IOException {
- int version = CodecUtil.checkHeader(input, TempBlockTermsWriter.TERMS_CODEC_NAME,
- TempBlockTermsWriter.TERMS_VERSION_START,
- TempBlockTermsWriter.TERMS_VERSION_CURRENT);
- if (version < TempBlockTermsWriter.TERMS_VERSION_APPEND_ONLY) {
+ int version = CodecUtil.checkHeader(input, TempBlockTreeTermsWriter.TERMS_CODEC_NAME,
+ TempBlockTreeTermsWriter.TERMS_VERSION_START,
+ TempBlockTreeTermsWriter.TERMS_VERSION_CURRENT);
+ if (version < TempBlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
dirOffset = input.readLong();
}
return version;
@@ -203,10 +203,10 @@ public class TempBlockTermsReader extend
/** Reads index file header. */
private int readIndexHeader(IndexInput input) throws IOException {
- int version = CodecUtil.checkHeader(input, TempBlockTermsWriter.TERMS_INDEX_CODEC_NAME,
- TempBlockTermsWriter.TERMS_INDEX_VERSION_START,
- TempBlockTermsWriter.TERMS_INDEX_VERSION_CURRENT);
- if (version < TempBlockTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ int version = CodecUtil.checkHeader(input, TempBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
+ TempBlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
+ TempBlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
+ if (version < TempBlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
indexDirOffset = input.readLong();
}
return version;
@@ -215,7 +215,7 @@ public class TempBlockTermsReader extend
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset)
throws IOException {
- if (version >= TempBlockTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ if (version >= TempBlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@@ -462,7 +462,7 @@ public class TempBlockTermsReader extend
FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
- //DEBUG = TempBlockTermsReader.DEBUG && fieldInfo.name.equals("id");
+ //DEBUG = TempBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
@@ -474,7 +474,7 @@ public class TempBlockTermsReader extend
// System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
// }
- rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> TempBlockTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+ rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> TempBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
if (indexIn != null) {
final IndexInput clone = indexIn.clone();
@@ -684,7 +684,7 @@ public class TempBlockTermsReader extend
// Skip first long -- has redundant fp, hasTerms
// flag, isFloor flag
final long code = floorDataReader.readVLong();
- if ((code & TempBlockTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
+ if ((code & TempBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
// if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
@@ -844,7 +844,7 @@ public class TempBlockTermsReader extend
// }
runAutomaton = compiled.runAutomaton;
compiledAutomaton = compiled;
- in = TempBlockTermsReader.this.in.clone();
+ in = TempBlockTreeTermsReader.this.in.clone();
stack = new Frame[5];
for(int idx=0;idx<stack.length;idx++) {
stack[idx] = new Frame(idx);
@@ -1334,7 +1334,7 @@ public class TempBlockTermsReader extend
// Not private to avoid synthetic access$NNN methods
void initIndexInput() {
if (this.in == null) {
- this.in = TempBlockTermsReader.this.in.clone();
+ this.in = TempBlockTreeTermsReader.this.in.clone();
}
}
@@ -1464,11 +1464,11 @@ public class TempBlockTermsReader extend
Frame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
final long code = scratchReader.readVLong();
- final long fpSeek = code >>> TempBlockTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+ final long fpSeek = code >>> TempBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
final Frame f = getFrame(1+currentFrame.ord);
- f.hasTerms = (code & TempBlockTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
+ f.hasTerms = (code & TempBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
f.hasTermsOrig = f.hasTerms;
- f.isFloor = (code & TempBlockTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
+ f.isFloor = (code & TempBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
if (f.isFloor) {
f.setFloorData(scratchReader, frameData);
}
@@ -2048,9 +2048,9 @@ public class TempBlockTermsReader extend
assert f != null;
final BytesRef prefix = new BytesRef(term.bytes, 0, f.prefix);
if (f.nextEnt == -1) {
- out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<TempBlockTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? TempBlockTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? TempBlockTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+ out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<TempBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? TempBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? TempBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
} else {
- out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<TempBlockTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? TempBlockTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? TempBlockTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+ out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<TempBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? TempBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? TempBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
}
if (index != null) {
assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
@@ -2065,7 +2065,7 @@ public class TempBlockTermsReader extend
} else if (isSeekFrame && !f.isFloor) {
final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length);
final long codeOrig = reader.readVLong();
- final long code = (f.fp << TempBlockTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? TempBlockTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? TempBlockTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
+ final long code = (f.fp << TempBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? TempBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? TempBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
if (codeOrig != code) {
out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
throw new RuntimeException("seek state is broken");
Copied: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java (from r1514245, lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java?p2=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java&p1=lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java&r1=1514245&r2=1514253&rev=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTreeTermsWriter.java Thu Aug 15 13:15:46 2013
@@ -178,20 +178,20 @@ import org.apache.lucene.codecs.CodecUti
* sub-block, and its file pointer.
* </ul>
*
- * @see TempBlockTermsReader
+ * @see TempBlockTreeTermsReader
* @lucene.experimental
*/
-public class TempBlockTermsWriter extends FieldsConsumer {
+public class TempBlockTreeTermsWriter extends FieldsConsumer {
/** Suggested default value for the {@code
* minItemsInBlock} parameter to {@link
- * #TempBlockTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int)}. */
+ * #TempBlockTreeTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int)}. */
public final static int DEFAULT_MIN_BLOCK_SIZE = 25;
/** Suggested default value for the {@code
* maxItemsInBlock} parameter to {@link
- * #TempBlockTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int)}. */
+ * #TempBlockTreeTermsWriter(SegmentWriteState,TempPostingsWriterBase,int,int)}. */
public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
//public final static boolean DEBUG = false;
@@ -268,7 +268,7 @@ public class TempBlockTermsWriter extend
* sub-blocks) per block will aim to be between
* minItemsPerBlock and maxItemsPerBlock, though in some
* cases the blocks may be smaller than the min. */
- public TempBlockTermsWriter(
+ public TempBlockTreeTermsWriter(
SegmentWriteState state,
TempPostingsWriterBase postingsWriter,
int minItemsInBlock,
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java?rev=1514253&r1=1514252&r2=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsReader.java Thu Aug 15 13:15:46 2013
@@ -71,7 +71,7 @@ public final class TempPostingsReader ex
IndexInput posIn = null;
IndexInput payIn = null;
try {
- docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.DOC_EXTENSION),
+ docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockTreePostingsFormat.DOC_EXTENSION),
ioContext);
CodecUtil.checkHeader(docIn,
TempPostingsWriter.DOC_CODEC,
@@ -80,7 +80,7 @@ public final class TempPostingsReader ex
forUtil = new ForUtil(docIn);
if (fieldInfos.hasProx()) {
- posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.POS_EXTENSION),
+ posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockTreePostingsFormat.POS_EXTENSION),
ioContext);
CodecUtil.checkHeader(posIn,
TempPostingsWriter.POS_CODEC,
@@ -88,7 +88,7 @@ public final class TempPostingsReader ex
TempPostingsWriter.VERSION_CURRENT);
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
- payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockPostingsFormat.PAY_EXTENSION),
+ payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempBlockTreePostingsFormat.PAY_EXTENSION),
ioContext);
CodecUtil.checkHeader(payIn,
TempPostingsWriter.PAY_CODEC,
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java?rev=1514253&r1=1514252&r2=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsWriter.java Thu Aug 15 13:15:46 2013
@@ -119,7 +119,7 @@ public final class TempPostingsWriter ex
public TempPostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
super();
- docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.DOC_EXTENSION),
+ docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockTreePostingsFormat.DOC_EXTENSION),
state.context);
IndexOutput posOut = null;
IndexOutput payOut = null;
@@ -129,7 +129,7 @@ public final class TempPostingsWriter ex
forUtil = new ForUtil(acceptableOverheadRatio, docOut);
if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new int[MAX_DATA_SIZE];
- posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.POS_EXTENSION),
+ posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockTreePostingsFormat.POS_EXTENSION),
state.context);
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
@@ -150,7 +150,7 @@ public final class TempPostingsWriter ex
}
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
- payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockPostingsFormat.PAY_EXTENSION),
+ payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempBlockTreePostingsFormat.PAY_EXTENSION),
state.context);
CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java?rev=1514253&r1=1514252&r2=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermState.java Thu Aug 15 13:15:46 2013
@@ -20,6 +20,7 @@ import java.util.Arrays;
import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.codecs.TempPostingsReaderBase; // javadocs
+import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
@@ -28,7 +29,7 @@ import org.apache.lucene.store.ByteArray
* to produce a {@link DocsEnum} without re-seeking the
* terms dict.
*/
-public class TempTermState extends TermState {
+public class TempTermState extends OrdTermState {
/** how many docs have this term */
public int docFreq;
/** total number of occurrences of this term */
@@ -36,6 +37,8 @@ public class TempTermState extends TermS
/** the term's ord in the current block */
public int termBlockOrd;
+ /** fp into the terms dict primary file (_X.tim) that holds this term */
+ public long blockFilePointer;
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
@@ -46,13 +49,15 @@ public class TempTermState extends TermS
public void copyFrom(TermState _other) {
assert _other instanceof TempTermState : "can not copy from " + _other.getClass().getName();
TempTermState other = (TempTermState) _other;
+ super.copyFrom(_other);
docFreq = other.docFreq;
totalTermFreq = other.totalTermFreq;
termBlockOrd = other.termBlockOrd;
+ blockFilePointer = other.blockFilePointer;
}
@Override
public String toString() {
- return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd;
+ return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
}
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1514253&r1=1514252&r2=1514253&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Thu Aug 15 13:15:46 2013
@@ -15,6 +15,6 @@
org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat
-org.apache.lucene.codecs.temp.TempBlockPostingsFormat
+org.apache.lucene.codecs.temp.TempBlockTreePostingsFormat
org.apache.lucene.codecs.temp.TempFSTPostingsFormat
org.apache.lucene.codecs.temp.TempFSTOrdPostingsFormat