You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/21 00:01:29 UTC
svn commit: r1061561 [2/2] - in /lucene/dev/branches/bulkpostings: ./ lucene/
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/contrib/misc/src/java/org/apa...
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Thu Jan 20 23:01:27 2011
@@ -20,17 +20,19 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import java.util.Collection;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -131,16 +133,31 @@ public class SepPostingsReaderImpl exten
}
}
- private static final class SepTermState extends PrefixCodedTermState {
+ private static final class SepTermState extends BlockTermState {
// We store only the seek point to the docs file because
// the rest of the info (freqIndex, posIndex, etc.) is
// stored in the docs file:
IntIndexInput.Index docIndex;
-
+ IntIndexInput.Index posIndex;
+ IntIndexInput.Index freqIndex;
+ long payloadFP;
+ long skipFP;
+
+ // Only used for "primary" term state; these are never
+ // copied on clone:
+ byte[] bytes;
+ ByteArrayDataInput bytesReader;
+
@Override
public Object clone() {
SepTermState other = (SepTermState) super.clone();
other.docIndex = (IntIndexInput.Index) docIndex.clone();
+ if (freqIndex != null) {
+ other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
+ }
+ if (posIndex != null) {
+ other.posIndex = (IntIndexInput.Index) posIndex.clone();
+ }
return other;
}
@@ -148,28 +165,87 @@ public class SepPostingsReaderImpl exten
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
docIndex.set(other.docIndex);
+ if (freqIndex != null && other.freqIndex != null) {
+ freqIndex.set(other.freqIndex);
+ }
+ if (posIndex != null && other.posIndex != null) {
+ posIndex.set(other.posIndex);
+ }
+ payloadFP = other.payloadFP;
+ skipFP = other.skipFP;
}
@Override
public String toString() {
- return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord + " docIndex=" + docIndex;
+ return super.toString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex + " payloadFP=" + payloadFP + " skipFP=" + skipFP;
}
}
@Override
- public PrefixCodedTermState newTermState() throws IOException {
- final SepTermState state = new SepTermState();
+ public BlockTermState newTermState() throws IOException {
+ final SepTermState state = new SepTermState();
state.docIndex = docIn.index();
+ if (freqIn != null) {
+ state.freqIndex = freqIn.index();
+ }
+ if (posIn != null) {
+ state.posIndex = posIn.index();
+ }
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException {
- ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final SepTermState termState = (SepTermState) _termState;
+ final int len = termsIn.readVInt();
+ //System.out.println("SepR.readTermsBlock len=" + len);
+ if (termState.bytes == null) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ termState.bytesReader = new ByteArrayDataInput(termState.bytes);
+ } else if (termState.bytes.length < len) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ termState.bytesReader.reset(termState.bytes, 0, len);
+ termsIn.readBytes(termState.bytes, 0, len);
+ }
+
+ @Override
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final SepTermState termState = (SepTermState) _termState;
+ //System.out.println("SepR.nextTerm termCount=" + termState.termCount);
+ //System.out.println(" docFreq=" + termState.docFreq);
+ final boolean isFirstTerm = termState.termCount == 0;
+ termState.docIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" docIndex=" + termState.docIndex);
+ if (!fieldInfo.omitTermFreqAndPositions) {
+ termState.freqIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" freqIndex=" + termState.freqIndex);
+ termState.posIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" posIndex=" + termState.posIndex);
+ if (fieldInfo.storePayloads) {
+ if (isFirstTerm) {
+ termState.payloadFP = termState.bytesReader.readVLong();
+ } else {
+ termState.payloadFP += termState.bytesReader.readVLong();
+ }
+ //System.out.println(" payloadFP=" + termState.payloadFP);
+ }
+ }
+ if (termState.docFreq >= skipInterval) {
+ //System.out.println(" readSkip @ " + termState.bytesReader.pos);
+ if (isFirstTerm) {
+ termState.skipFP = termState.bytesReader.readVLong();
+ } else {
+ termState.skipFP += termState.bytesReader.readVLong();
+ }
+ //System.out.println(" skipFP=" + termState.skipFP);
+ } else if (isFirstTerm) {
+ termState.skipFP = termState.bytesReader.readVLong();
+ }
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum) || !((SepDocsEnum) reuse).canReuse(docIn)) {
@@ -184,7 +260,7 @@ public class SepPostingsReaderImpl exten
private SepBulkPostingsEnum lastBulkEnum;
@Override
- public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, BlockTermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
final SepTermState termState = (SepTermState) _termState;
final SepBulkPostingsEnum lastBulkEnum = this.lastBulkEnum;
if (lastBulkEnum != null && reuse == lastBulkEnum) {
@@ -202,8 +278,7 @@ public class SepPostingsReaderImpl exten
}
}
- @Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
@@ -234,7 +309,7 @@ public class SepPostingsReaderImpl exten
private final int[] freqBuffer;
private int freqUpto;
private int freqLimit;
- private long skipOffset;
+ private long skipFP;
private final IntIndexInput.Index docIndex;
private final IntIndexInput.Index freqIndex;
@@ -282,28 +357,22 @@ public class SepPostingsReaderImpl exten
docDeltaUpto = docReader.offset();
if (!omitTF) {
- freqIndex.read(docReader, true);
+ freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
freqUpto = freqReader.offset();
freqLimit = freqReader.end();
//System.out.println(" freqIndex=" + freqIndex + " posIndex=" + posIndex);
-
- posIndex.read(docReader, true);
- if (storePayloads) {
- // skip payload offset
- IntIndexInput.readVLong(docReader);
- }
} else {
freq = 1;
}
- skipOffset = IntIndexInput.readVLong(docReader);
-
docDeltaUpto = docReader.offset();
docDeltaLimit = docReader.end();
docFreq = termState.docFreq;
assert docFreq > 0;
+ // NOTE: unused if docFreq < skipInterval:
+ skipFP = termState.skipFP;
count = 0;
doc = 0;
skipped = false;
@@ -355,7 +424,6 @@ public class SepPostingsReaderImpl exten
break;
}
}
-
return doc;
}
@@ -396,7 +464,7 @@ public class SepPostingsReaderImpl exten
if (!skipped) {
//System.out.println(" init skipper2");
// We haven't yet skipped for this posting
- skipper.init(skipOffset,
+ skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
@@ -461,8 +529,7 @@ public class SepPostingsReaderImpl exten
private final int[] posBuffer;
private int posUpto;
private int posLimit;
- private long skipOffset;
- private long payloadOffset;
+ private long skipFP;
private final IndexInput payloadIn;
@@ -471,6 +538,8 @@ public class SepPostingsReaderImpl exten
private final IntIndexInput.Index posIndex;
private final IntIndexInput startDocIn;
+ private long payloadFP;
+
private int pendingPosCount;
private int position;
private int payloadLength;
@@ -504,6 +573,7 @@ public class SepPostingsReaderImpl exten
//System.out.println("sep d&p init");
assert !fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
+ //System.out.println("Sep D&P init");
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
@@ -513,23 +583,22 @@ public class SepPostingsReaderImpl exten
docIndex.seek(docReader);
docDeltaLimit = docReader.end();
docDeltaUpto = docReader.offset();
+ //System.out.println(" docIndex=" + docIndex);
- freqIndex.read(docReader, true);
+ freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
freqLimit = freqReader.end();
freqUpto = freqReader.offset();
//System.out.println(" freqIndex=" + freqIndex);
- posIndex.read(docReader, true);
+ posIndex.set(termState.posIndex);
+ //System.out.println(" posIndex=" + posIndex);
posSeekPending = true;
payloadPending = false;
- if (storePayloads) {
- payloadOffset = IntIndexInput.readVLong(docReader);
- }
- //System.out.println(" payloadOffset=" + payloadOffset);
- skipOffset = IntIndexInput.readVLong(docReader);
- //System.out.println(" skipOffset=" + skipOffset);
+ payloadFP = termState.payloadFP;
+ skipFP = termState.skipFP;
+ //System.out.println(" skipFP=" + skipFP);
docDeltaLimit = docReader.end();
docDeltaUpto = docReader.offset();
@@ -575,7 +644,8 @@ public class SepPostingsReaderImpl exten
// Decode next doc
doc += docDeltaBuffer[docDeltaUpto++];
-
+
+ //System.out.println(" sep d&p read freq");
if (freqUpto == freqLimit) {
// refill
freqLimit = freqReader.fill();
@@ -606,6 +676,7 @@ public class SepPostingsReaderImpl exten
@Override
public int advance(int target) throws IOException {
+ //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
// TODO: jump right to next() if target is < X away
// from where we are now?
@@ -616,6 +687,7 @@ public class SepPostingsReaderImpl exten
// skip data
if (skipper == null) {
+ //System.out.println(" create skipper");
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
freqIn,
@@ -626,18 +698,19 @@ public class SepPostingsReaderImpl exten
}
if (!skipped) {
+ //System.out.println(" init skip data skipFP=" + skipFP);
// We haven't yet skipped for this posting
- skipper.init(skipOffset,
+ skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
- payloadOffset,
+ payloadFP,
docFreq,
storePayloads);
skipped = true;
}
-
final int newCount = skipper.skipTo(target);
+ //System.out.println(" skip newCount=" + newCount + " vs " + count);
if (newCount > count) {
@@ -650,26 +723,35 @@ public class SepPostingsReaderImpl exten
docDeltaUpto = docReader.offset();
docDeltaLimit = docReader.end();
+ // NOTE: don't seek pos here; do it lazily
+ // instead. Eg a PhraseQuery may skip to many
+ // docs before finally asking for positions...
posIndex.set(skipper.getPosIndex());
posSeekPending = true;
count = newCount;
doc = skipper.getDoc();
- payloadOffset = skipper.getPayloadPointer();
+ //System.out.println(" moved to doc=" + doc);
+ //payloadIn.seek(skipper.getPayloadPointer());
+ payloadFP = skipper.getPayloadPointer();
pendingPosCount = 0;
pendingPayloadBytes = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
+ //System.out.println(" move payloadLen=" + payloadLength);
}
}
// Now, linear scan for the rest:
do {
if (nextDoc() == NO_MORE_DOCS) {
+ //System.out.println(" advance nextDoc=END");
return NO_MORE_DOCS;
}
+ //System.out.println(" advance nextDoc=" + doc);
} while (target > doc);
+ //System.out.println(" return doc=" + doc);
return doc;
}
@@ -679,7 +761,7 @@ public class SepPostingsReaderImpl exten
posIndex.seek(posReader);
posLimit = posReader.end();
posUpto = posReader.offset();
- payloadIn.seek(payloadOffset);
+ payloadIn.seek(payloadFP);
posSeekPending = false;
}
@@ -776,7 +858,7 @@ public class SepPostingsReaderImpl exten
private final boolean storePayloads;
private final boolean omitTF;
- private long skipOffset;
+ private long skipFP;
private final IntIndexInput startDocIn;
@@ -934,18 +1016,15 @@ public class SepPostingsReaderImpl exten
// make this a relative index read (pass false not
// true), eg relative to first term in the terms
// index block
- freqIndex.read(docReader, true);
+ freqIndex.set(termState.freqIndex);
if (freqReader != null) {
freqIndex.seek(freqReader);
}
- posIndex.read(docReader, true);
- if (storePayloads) {
- // skip payload offset
- IntIndexInput.readVLong(docReader);
- }
+
+ posIndex.set(termState.posIndex);
}
- skipOffset = IntIndexInput.readVLong(docReader);
+ skipFP = termState.skipFP;
//System.out.println("skipOffset=" + skipOffset);
if (posReader != null) {
@@ -1006,7 +1085,7 @@ public class SepPostingsReaderImpl exten
if (!skipped) {
// We haven't yet skipped for this particular posting
- skipper.init(skipOffset,
+ skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Thu Jan 20 23:01:27 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.index.SegmentWr
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -69,8 +70,7 @@ public final class SepPostingsWriterImpl
boolean storePayloads;
boolean omitTF;
- // Starts a new term
- long lastSkipStart;
+ long lastSkipFP;
FieldInfo fieldInfo;
@@ -80,7 +80,10 @@ public final class SepPostingsWriterImpl
long lastPayloadStart;
int lastDocID;
int df;
- private boolean firstDoc;
+ private int pendingTermCount;
+
+ // Holds pending byte[] blob for the current terms block
+ private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException {
super();
@@ -144,13 +147,9 @@ public final class SepPostingsWriterImpl
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
- firstDoc = true;
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
- // TODO: -- should we NOT reuse across fields? would
- // be cleaner
-
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
@@ -161,31 +160,13 @@ public final class SepPostingsWriterImpl
storePayloads = !omitTF && fieldInfo.storePayloads;
}
-
/** Adds a new doc in this term. If this returns null
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
- if (firstDoc) {
- // nocommit: we are writing absolute file pointers below,
- // which is wasteful. It'd be better compression to
- // write the "baseline" into each indexed term, then
- // write only the delta here.
- if (!omitTF) {
- freqIndex.write(docOut, true);
- posIndex.write(docOut, true);
- if (fieldInfo.storePayloads) {
- docOut.writeVLong(payloadStart);
- }
- }
- // nocommit -- only write if docFreq > skipInterval
- // nocommit -- use delta not abs
- docOut.writeVLong(skipOut.getFilePointer());
- firstDoc = false;
- }
-
final int delta = docID - lastDocID;
+ //System.out.println("SepW startDoc: write doc=" + docID + " delta=" + delta);
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
@@ -194,6 +175,7 @@ public final class SepPostingsWriterImpl
if ((++df % skipInterval) == 0) {
// TODO: -- awkward we have to make these two
// separate calls to skipper
+ //System.out.println(" buffer skip lastDocID=" + lastDocID);
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
@@ -202,10 +184,20 @@ public final class SepPostingsWriterImpl
//System.out.println("sepw: write docID=" + docID);
docOut.write(delta);
if (!omitTF) {
+ //System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq);
}
}
+ @Override
+ public void flushTermsBlock() throws IOException {
+ //System.out.println("SepW.flushTermsBlock: pendingTermCount=" + pendingTermCount + " bytesUsed=" + indexBytesWriter.getFilePointer());
+ termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
+ indexBytesWriter.writeTo(termsOut);
+ indexBytesWriter.reset();
+ pendingTermCount = 0;
+ }
+
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
@@ -245,20 +237,57 @@ public final class SepPostingsWriterImpl
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
-
+ public void finishTerm(TermStats stats) throws IOException {
// TODO: -- wasteful we are counting this in two places?
assert stats.docFreq > 0;
assert stats.docFreq == df;
- docIndex.write(termsOut, isIndexTerm);
+ final boolean isFirstTerm = pendingTermCount == 0;
+ //System.out.println("SepW.finishTerm: isFirstTerm=" + isFirstTerm);
+
+ docIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" docIndex=" + docIndex);
+
+ if (!omitTF) {
+ freqIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" freqIndex=" + freqIndex);
+
+ posIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" posIndex=" + posIndex);
+ if (storePayloads) {
+ if (isFirstTerm) {
+ indexBytesWriter.writeVLong(payloadStart);
+ } else {
+ indexBytesWriter.writeVLong(payloadStart - lastPayloadStart);
+ }
+ lastPayloadStart = payloadStart;
+ //System.out.println(" payloadFP=" + payloadStart);
+ }
+ }
if (df >= skipInterval) {
+ //System.out.println(" skipFP=" + skipStart);
+ final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);
+ //System.out.println(" writeSkip @ " + indexBytesWriter.getFilePointer());
+ if (isFirstTerm) {
+ indexBytesWriter.writeVLong(skipFP);
+ } else {
+ indexBytesWriter.writeVLong(skipFP - lastSkipFP);
+ }
+ lastSkipFP = skipFP;
+ } else if (isFirstTerm) {
+ // TODO: this is somewhat wasteful; eg if no terms in
+ // this block will use skip data, we don't need to
+ // write this:
+ final long skipFP = skipOut.getFilePointer();
+ indexBytesWriter.writeVLong(skipFP);
+ lastSkipFP = skipFP;
}
lastDocID = 0;
df = 0;
+ pendingTermCount++;
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Thu Jan 20 23:01:27 2011
@@ -33,8 +33,8 @@ import org.apache.lucene.index.codecs.Te
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.store.Directory;
/** Default codec.
@@ -66,7 +66,7 @@ public class StandardCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -103,15 +103,15 @@ public class StandardCodec extends Codec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postings,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postings,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -134,7 +134,7 @@ public class StandardCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, id, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+ BlockTermsReader.files(dir, segmentInfo, id, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@@ -146,7 +146,7 @@ public class StandardCodec extends Codec
public static void getStandardExtensions(Set<String> extensions) {
extensions.add(FREQ_EXTENSION);
extensions.add(PROX_EXTENSION);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
VariableGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Thu Jan 20 23:01:27 2011
@@ -20,17 +20,21 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -47,9 +51,12 @@ public class StandardPostingsReader exte
int skipInterval;
int maxSkipLevels;
+ //private String segment;
+
public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, String codecId) throws IOException {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
readBufferSize);
+ //this.segment = segmentInfo.name;
if (segmentInfo.getHasProx()) {
boolean success = false;
try {
@@ -85,11 +92,16 @@ public class StandardPostingsReader exte
}
// Must keep final because we do non-standard clone
- private final static class StandardTermState extends PrefixCodedTermState {
+ private final static class StandardTermState extends BlockTermState {
long freqOffset;
long proxOffset;
int skipOffset;
+ // Only used by the "primary" TermState -- clones don't
+ // copy this (basically they are "transient"):
+ ByteArrayDataInput bytesReader;
+ byte[] bytes;
+
public Object clone() {
StandardTermState other = new StandardTermState();
other.copyFrom(this);
@@ -102,6 +114,11 @@ public class StandardPostingsReader exte
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
+
+ // Do not copy bytes, bytesReader (else TermState is
+ // very heavy, ie drags around the entire block's
+ // byte[]). On seek back, if next() is in fact used
+ // (rare!), they will be re-read from disk.
}
public String toString() {
@@ -110,7 +127,7 @@ public class StandardPostingsReader exte
}
@Override
- public PrefixCodedTermState newTermState() {
+ public BlockTermState newTermState() {
return new StandardTermState();
}
@@ -127,34 +144,58 @@ public class StandardPostingsReader exte
}
}
+ /* Reads but does not decode the byte[] blob holding
+ metadata for the current terms block */
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm)
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final StandardTermState termState = (StandardTermState) _termState;
+
+ final int len = termsIn.readVInt();
+ //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
+ if (termState.bytes == null) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ termState.bytesReader = new ByteArrayDataInput(null);
+ } else if (termState.bytes.length < len) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+
+ termsIn.readBytes(termState.bytes, 0, len);
+ termState.bytesReader.reset(termState.bytes, 0, len);
+ }
+
+ @Override
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
throws IOException {
- final StandardTermState docTermState = (StandardTermState) termState;
+ final StandardTermState termState = (StandardTermState) _termState;
+ //System.out.println("StandardR.nextTerm seg=" + segment);
+ final boolean isFirstTerm = termState.termCount == 0;
- if (isIndexTerm) {
- docTermState.freqOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.freqOffset = termState.bytesReader.readVLong();
} else {
- docTermState.freqOffset += termsIn.readVLong();
+ termState.freqOffset += termState.bytesReader.readVLong();
}
+ //System.out.println(" freqFP=" + termState.freqOffset);
- if (docTermState.docFreq >= skipInterval) {
- docTermState.skipOffset = termsIn.readVInt();
+ if (termState.docFreq >= skipInterval) {
+ termState.skipOffset = termState.bytesReader.readVInt();
+ //System.out.println(" skipOffset=" + termState.skipOffset);
} else {
- docTermState.skipOffset = 0;
+ // undefined
}
if (!fieldInfo.omitTermFreqAndPositions) {
- if (isIndexTerm) {
- docTermState.proxOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.proxOffset = termState.bytesReader.readVLong();
} else {
- docTermState.proxOffset += termsIn.readVLong();
+ termState.proxOffset += termState.bytesReader.readVLong();
}
+ //System.out.println(" proxFP=" + termState.proxOffset);
}
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
@@ -173,7 +214,7 @@ public class StandardPostingsReader exte
private SegmentBulkPostingsEnum lastBulkEnum;
@Override
- public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, BlockTermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
// must assign to local, first, for thread safety:
final SegmentBulkPostingsEnum lastBulkEnum = this.lastBulkEnum;
@@ -192,8 +233,7 @@ public class StandardPostingsReader exte
}
}
- @Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
}
@@ -274,6 +314,7 @@ public class StandardPostingsReader exte
assert limit > 0;
ord = 0;
doc = 0;
+ //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
@@ -413,6 +454,7 @@ public class StandardPostingsReader exte
limit = termState.docFreq;
assert limit > 0;
+
ord = 0;
doc = 0;
position = 0;
@@ -423,6 +465,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@@ -431,6 +474,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -454,6 +498,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -470,6 +515,8 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
+ //System.out.println("StandardR.D&PE advance target=" + target);
+
// TODO: jump right to next() if target is < X away
// from where we are now?
@@ -615,6 +662,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@@ -623,6 +671,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -646,6 +695,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -741,6 +791,7 @@ public class StandardPostingsReader exte
posPendingCount--;
position = 0;
payloadPending = false;
+ //System.out.println("StandardR.D&PE skipPos");
}
// read next position
@@ -764,6 +815,7 @@ public class StandardPostingsReader exte
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
+ //System.out.println("StandardR.D&PE nextPos return pos=" + position);
return position;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Thu Jan 20 23:01:27 2011
@@ -22,13 +22,14 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -59,8 +60,15 @@ public final class StandardPostingsWrite
int lastPayloadLength;
int lastPosition;
+ private int pendingCount;
+
+ //private String segment;
+
+ private RAMOutputStream bytesWriter = new RAMOutputStream();
+
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
super();
+ //this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName);
@@ -96,6 +104,7 @@ public final class StandardPostingsWrite
@Override
public void startTerm() {
+ //System.out.println("StandardW: startTerm seg=" + segment + " pendingCount=" + pendingCount);
freqStart = freqOut.getFilePointer();
if (proxOut != null) {
proxStart = proxOut.getFilePointer();
@@ -109,9 +118,12 @@ public final class StandardPostingsWrite
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
+ //System.out.println("SPW: setField");
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
+ //System.out.println(" set init blockFreqStart=" + freqStart);
+ //System.out.println(" set init blockProxStart=" + proxStart);
}
int lastDocID;
@@ -121,6 +133,7 @@ public final class StandardPostingsWrite
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
+ //System.out.println("StandardW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq);
final int delta = docID - lastDocID;
@@ -151,6 +164,7 @@ public final class StandardPostingsWrite
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
+ //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
assert proxOut != null;
@@ -185,40 +199,51 @@ public final class StandardPostingsWrite
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
+ public void finishTerm(TermStats stats) throws IOException {
+ //System.out.println("StandardW.finishTerm seg=" + segment);
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(freqStart);
+ final boolean isFirstTerm = pendingCount == 0;
+ //System.out.println(" isFirstTerm=" + isFirstTerm);
+
+ //System.out.println(" freqFP=" + freqStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(freqStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(freqStart - lastFreqStart);
+ bytesWriter.writeVLong(freqStart-lastFreqStart);
}
-
lastFreqStart = freqStart;
if (df >= skipInterval) {
- termsOut.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
+ bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
-
+
if (!omitTermFreqAndPositions) {
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(proxStart);
+ //System.out.println(" proxFP=" + proxStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(proxStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(proxStart - lastProxStart);
+ bytesWriter.writeVLong(proxStart - lastProxStart);
}
lastProxStart = proxStart;
}
-
+
lastDocID = 0;
df = 0;
+ pendingCount++;
+ }
+
+ @Override
+ public void flushTermsBlock() throws IOException {
+ //System.out.println("SPW.flushBlock pendingCount=" + pendingCount);
+ termsOut.writeVInt((int) bytesWriter.getFilePointer());
+ bytesWriter.writeTo(termsOut);
+ bytesWriter.reset();
+ pendingCount = 0;
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Thu Jan 20 23:01:27 2011
@@ -123,12 +123,12 @@ public abstract class FilteredTermsEnum
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return tenum.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return tenum.totalTermFreq();
}
@@ -173,7 +173,7 @@ public abstract class FilteredTermsEnum
* @throws UnsupportedOperationException
*/
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Thu Jan 20 23:01:27 2011
@@ -241,12 +241,12 @@ public final class FuzzyTermsEnum extend
// proxy all other enum calls to the actual enum
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return actualEnum.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return actualEnum.totalTermFreq();
}
@@ -261,8 +261,8 @@ public final class FuzzyTermsEnum extend
return actualEnum.docsAndPositions(skipDocs, reuse);
}
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
- return actualEnum.seek(term, state);
+ public void seek(BytesRef term, TermState state) throws IOException {
+ actualEnum.seek(term, state);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Thu Jan 20 23:01:27 2011
@@ -341,9 +341,9 @@ public class DocTermsIndexCreator extend
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
- return this.seek(((OrdTermState)state).ord);
+ this.seek(((OrdTermState)state).ord);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java Thu Jan 20 23:01:27 2011
@@ -21,6 +21,7 @@ package org.apache.lucene.store;
public final class ByteArrayDataInput extends DataInput {
private byte[] bytes;
+
private int pos;
private int limit;
@@ -38,10 +39,14 @@ public final class ByteArrayDataInput ex
public void reset(byte[] bytes, int offset, int limit) {
this.bytes = bytes;
- pos = offset;
+ this.pos = offset;
this.limit = limit;
}
+ public int getPosition() {
+ return pos;
+ }
+
public boolean eof() {
return pos == limit;
}
@@ -50,15 +55,59 @@ public final class ByteArrayDataInput ex
pos += count;
}
+ @Override
+ public short readShort() {
+ return (short) (((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF));
+ }
+
+ @Override
+ public int readInt() {
+ return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
+ | ((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF);
+ }
+
+ @Override
+ public long readLong() {
+ final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
+ }
+
+ @Override
+ public int readVInt() {
+ byte b = bytes[pos++];
+ int i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = bytes[pos++];
+ i |= (b & 0x7F) << shift;
+ }
+ return i;
+ }
+
+ @Override
+ public long readVLong() {
+ byte b = bytes[pos++];
+ long i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = bytes[pos++];
+ i |= (b & 0x7FL) << shift;
+ }
+ return i;
+ }
+
// NOTE: AIOOBE not EOF if you read too much
@Override
public byte readByte() {
+ assert pos < limit;
return bytes[pos++];
}
// NOTE: AIOOBE not EOF if you read too much
@Override
public void readBytes(byte[] b, int offset, int len) {
+ assert pos + len <= limit;
System.arraycopy(bytes, pos, b, offset, len);
pos += len;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/DataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/DataInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/DataInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/DataInput.java Thu Jan 20 23:01:27 2011
@@ -79,7 +79,7 @@ public abstract class DataInput implemen
* supported.
* @see DataOutput#writeVInt(int)
*/
- public final int readVInt() throws IOException {
+ public int readVInt() throws IOException {
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -99,7 +99,7 @@ public abstract class DataInput implemen
/** Reads a long stored in variable-length format. Reads between one and
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. */
- public final long readVLong() throws IOException {
+ public long readVLong() throws IOException {
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Thu Jan 20 23:01:27 2011
@@ -245,7 +245,6 @@ public class TestExternalCodecs extends
}
}
-
// Classes for reading from the postings state
static class RAMFieldsEnum extends FieldsEnum {
private final RAMPostings postings;
@@ -651,7 +650,7 @@ public class TestExternalCodecs extends
// Terms dict
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator);
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator);
success = true;
return ret;
} finally {
@@ -692,15 +691,15 @@ public class TestExternalCodecs extends
// Terms dict reader
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- pulsingReader,
- state.readBufferSize,
- reverseUnicodeComparator,
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ pulsingReader,
+ state.readBufferSize,
+ reverseUnicodeComparator,
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -717,7 +716,7 @@ public class TestExternalCodecs extends
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@@ -745,6 +744,7 @@ public class TestExternalCodecs extends
setCodecProvider(provider).
setMergePolicy(newLogMergePolicy(3))
);
+ w.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
// uses default codec:
doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED));
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java Thu Jan 20 23:01:27 2011
@@ -111,6 +111,9 @@ public class TestSearch extends LuceneTe
for (int j = 0; j < queries.length; j++) {
Query query = parser.parse(queries[j]);
out.println("Query: " + query.toString("contents"));
+ if (VERBOSE) {
+ System.out.println("TEST: query=" + query);
+ }
hits = searcher.search(query, null, 1000).scoreDocs;
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Thu Jan 20 23:01:27 2011
@@ -108,6 +108,9 @@ public class TestSearchForDuplicates ext
Query query = parser.parse(HIGH_PRIORITY);
out.println("Query: " + query.toString(PRIORITY_FIELD));
+ if (VERBOSE) {
+ System.out.println("TEST: search query=" + query);
+ }
ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS).scoreDocs;
printHits(out, hits, searcher);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Thu Jan 20 23:01:27 2011
@@ -55,6 +55,7 @@ public class TestAddIndexes extends Luce
writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer())
.setOpenMode(OpenMode.CREATE));
+ writer.setInfoStream(VERBOSE ? System.out : null);
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Jan 20 23:01:27 2011
@@ -882,6 +882,10 @@ public class TestIndexReader extends Luc
// First build up a starting index:
MockDirectoryWrapper startDir = newDirectory();
IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ if (VERBOSE) {
+ System.out.println("TEST: create initial index");
+ writer.setInfoStream(System.out);
+ }
for(int i=0;i<157;i++) {
Document d = new Document();
d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
@@ -892,6 +896,19 @@ public class TestIndexReader extends Luc
}
writer.close();
+ {
+ IndexReader r = IndexReader.open(startDir);
+ IndexSearcher searcher = new IndexSearcher(r);
+ ScoreDoc[] hits = null;
+ try {
+ hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail("exception when init searching: " + e);
+ }
+ r.close();
+ }
+
long diskUsage = startDir.getRecomputedActualSizeInBytes();
long diskFree = diskUsage+100;
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Thu Jan 20 23:01:27 2011
@@ -977,7 +977,11 @@ public class TestIndexReaderReopen exten
static void modifyIndex(int i, Directory dir) throws IOException {
switch (i) {
case 0: {
+ if (VERBOSE) {
+ System.out.println("TEST: modify index");
+ }
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ w.setInfoStream(VERBOSE ? System.out : null);
w.deleteDocuments(new Term("field2", "a11"));
w.deleteDocuments(new Term("field2", "b30"));
w.close();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestMultiFields.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestMultiFields.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestMultiFields.java Thu Jan 20 23:01:27 2011
@@ -97,6 +97,9 @@ public class TestMultiFields extends Luc
for(int i=0;i<100;i++) {
BytesRef term = terms.get(random.nextInt(terms.size()));
+ if (VERBOSE) {
+ System.out.println("TEST: seek to term= "+ UnicodeUtil.toHexString(term.utf8ToString()));
+ }
DocsEnum docsEnum = terms2.docs(delDocs, term, null);
assertNotNull(docsEnum);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java Thu Jan 20 23:01:27 2011
@@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -126,7 +126,7 @@ public class MockFixedIntBlockCodec exte
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -164,15 +164,15 @@ public class MockFixedIntBlockCodec exte
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -189,14 +189,14 @@ public class MockFixedIntBlockCodec exte
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Thu Jan 20 23:01:27 2011
@@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -150,7 +150,7 @@ public class MockVariableIntBlockCodec e
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -189,15 +189,15 @@ public class MockVariableIntBlockCodec e
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -214,14 +214,14 @@ public class MockVariableIntBlockCodec e
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Thu Jan 20 23:01:27 2011
@@ -26,6 +26,9 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
@@ -33,13 +36,11 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.mockintblock.MockFixedIntBlockCodec;
import org.apache.lucene.index.codecs.mockintblock.MockVariableIntBlockCodec;
import org.apache.lucene.index.codecs.mocksep.MockSingleIntFactory;
@@ -152,6 +153,10 @@ public class MockRandomCodec extends Cod
public boolean isIndexTerm(BytesRef term, TermStats stats) {
return random.nextInt(gap) == 17;
}
+
+ @Override
+ public void newField(FieldInfo fieldInfo) {
+ }
};
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
@@ -165,7 +170,7 @@ public class MockRandomCodec extends Cod
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -269,15 +274,15 @@ public class MockRandomCodec extends Cod
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- termsCacheSize,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ termsCacheSize,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -297,7 +302,7 @@ public class MockRandomCodec extends Cod
files.add(seedFileName);
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
@@ -315,7 +320,7 @@ public class MockRandomCodec extends Cod
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
VariableGapTermsIndexReader.getIndexExtensions(extensions);
extensions.add(SEED_EXT);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java Thu Jan 20 23:01:27 2011
@@ -30,8 +30,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -70,7 +70,7 @@ public class MockSepCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -108,15 +108,15 @@ public class MockSepCodec extends Codec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -133,7 +133,7 @@ public class MockSepCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@@ -144,7 +144,7 @@ public class MockSepCodec extends Codec
public static void getSepExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
-}
\ No newline at end of file
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java Thu Jan 20 23:01:27 2011
@@ -19,10 +19,11 @@ package org.apache.lucene.index.codecs.m
import java.io.IOException;
+import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CodecUtil;
-import org.apache.lucene.index.codecs.sep.IntIndexInput;
import org.apache.lucene.index.BulkPostingsEnum;
/** Reads IndexInputs written with {@link
@@ -95,7 +96,7 @@ public class MockSingleIntIndexInput ext
private long fp;
@Override
- public void read(IndexInput indexIn, boolean absolute)
+ public void read(DataInput indexIn, boolean absolute)
throws IOException {
if (absolute) {
fp = indexIn.readVLong();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Thu Jan 20 23:01:27 2011
@@ -49,6 +49,10 @@ public class TestFieldCache extends Luce
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
+ if (VERBOSE) {
+ System.out.println("TEST: setUp");
+ }
+ writer.w.setInfoStream(VERBOSE ? System.out : null);
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(newField("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
@@ -117,7 +121,7 @@ public class TestFieldCache extends Luce
assertSame("Second request with explicit parser return same array", longs, cache.getLongs(reader, "theLong", FieldCache.DEFAULT_LONG_PARSER));
assertTrue("longs Size: " + longs.length + " is not: " + NUM_DOCS, longs.length == NUM_DOCS);
for (int i = 0; i < longs.length; i++) {
- assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i), longs[i] == (Long.MAX_VALUE - i));
+ assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i) + " i=" + i, longs[i] == (Long.MAX_VALUE - i));
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Thu Jan 20 23:01:27 2011
@@ -40,7 +40,6 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -975,6 +974,9 @@ public class TestFSTs extends LuceneTest
Terms terms = MultiFields.getTerms(r, "body");
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
+ if (VERBOSE) {
+ System.out.println("TEST: got termsEnum=" + termsEnum);
+ }
BytesRef term;
int ord = 0;
while((term = termsEnum.next()) != null) {
@@ -982,6 +984,9 @@ public class TestFSTs extends LuceneTest
try {
termsEnum.ord();
} catch (UnsupportedOperationException uoe) {
+ if (VERBOSE) {
+ System.out.println("TEST: codec doesn't support ord; FST stores docFreq");
+ }
storeOrd = false;
}
}
@@ -1023,6 +1028,9 @@ public class TestFSTs extends LuceneTest
for(int nextIter=0;nextIter<10;nextIter++) {
if (VERBOSE) {
System.out.println("TEST: next");
+ if (storeOrd) {
+ System.out.println(" ord=" + termsEnum.ord());
+ }
}
if (termsEnum.next() != null) {
if (VERBOSE) {
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java Thu Jan 20 23:01:27 2011
@@ -1001,12 +1001,12 @@ class NumberedTermsEnum extends TermsEnu
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return tenum.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return tenum.totalTermFreq();
}
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java Thu Jan 20 23:01:27 2011
@@ -76,7 +76,11 @@ public class HighFrequencyDictionary imp
}
public float freq() {
- return termsEnum.docFreq();
+ try {
+ return termsEnum.docFreq();
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
}
public String next() {
@@ -112,8 +116,12 @@ public class HighFrequencyDictionary imp
}
// got a valid term, does it pass the threshold?
- if (isFrequent(termsEnum.docFreq())) {
- return true;
+ try {
+ if (isFrequent(termsEnum.docFreq())) {
+ return true;
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
}
}
}