You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/20 19:53:58 UTC
svn commit: r1061453 [2/3] - in /lucene/dev/trunk:
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/app...
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Thu Jan 20 18:53:55 2011
@@ -24,7 +24,7 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -58,11 +58,13 @@ public class PulsingPostingsReaderImpl e
wrappedPostingsReader.init(termsIn);
}
- private static class PulsingTermState extends PrefixCodedTermState {
+ private static class PulsingTermState extends BlockTermState {
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
- private PrefixCodedTermState wrappedTermState;
- private boolean pendingIndexTerm;
+ private BlockTermState wrappedTermState;
+
+ ByteArrayDataInput inlinedBytesReader;
+ private byte[] inlinedBytes;
@Override
public Object clone() {
@@ -73,7 +75,7 @@ public class PulsingPostingsReaderImpl e
System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
} else {
assert wrappedTermState != null;
- clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone();
+ clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
}
return clone;
}
@@ -91,56 +93,86 @@ public class PulsingPostingsReaderImpl e
} else {
wrappedTermState.copyFrom(other.wrappedTermState);
}
+
+ // NOTE: we do not copy the
+ // inlinedBytes/inlinedBytesReader; these are only
+ // stored on the "primary" TermState. They are
+ // "transient" to cloned term states.
}
@Override
public String toString() {
if (postingsSize == -1) {
- return "PulsingTermState: not inlined";
+ return "PulsingTermState: not inlined: wrapped=" + wrappedTermState;
} else {
- return "PulsingTermState: inlined size=" + postingsSize;
+ return "PulsingTermState: inlined size=" + postingsSize + " " + super.toString();
}
}
}
@Override
- public PrefixCodedTermState newTermState() throws IOException {
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final PulsingTermState termState = (PulsingTermState) _termState;
+ if (termState.inlinedBytes == null) {
+ termState.inlinedBytes = new byte[128];
+ termState.inlinedBytesReader = new ByteArrayDataInput(null);
+ }
+ int len = termsIn.readVInt();
+ if (termState.inlinedBytes.length < len) {
+ termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ termsIn.readBytes(termState.inlinedBytes, 0, len);
+ termState.inlinedBytesReader.reset(termState.inlinedBytes);
+ termState.wrappedTermState.termCount = 0;
+ wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
+ }
+
+ @Override
+ public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException {
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ //System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
- termState.pendingIndexTerm |= isIndexTerm;
-
// total TF, but in the omitTFAP case its computed based on docFreq.
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
-
+ //System.out.println(" count=" + count + " threshold=" + maxPositions);
+
if (count <= maxPositions) {
+ //System.out.println(" inlined");
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
- termState.postingsSize = termsIn.readVInt();
+ termState.postingsSize = termState.inlinedBytesReader.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
- termsIn.readBytes(termState.postings, 0, termState.postingsSize);
+ // TODO: sort of silly to copy from one big byte[]
+ // (the blob holding all inlined terms' blobs for
+ // current term block) into another byte[] (just the
+ // blob for this term)...
+ termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
} else {
+ //System.out.println(" not inlined");
termState.postingsSize = -1;
+ // TODO: should we do full copyFrom? much heavier...?
termState.wrappedTermState.docFreq = termState.docFreq;
- wrappedPostingsReader.readTerm(termsIn, fieldInfo, termState.wrappedTermState, termState.pendingIndexTerm);
- termState.pendingIndexTerm = false;
+ termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
+ wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
+ termState.wrappedTermState.termCount++;
}
}
// TODO: we could actually reuse, by having TL that
// holds the last wrapped reuse, and vice-versa
@Override
- public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
@@ -165,7 +197,7 @@ public class PulsingPostingsReaderImpl e
// TODO: -- not great that we can't always reuse
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) {
return null;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Thu Jan 20 18:53:55 2011
@@ -27,8 +27,8 @@ import org.apache.lucene.store.RAMOutput
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-// TODO: we pulse based on total TF of the term,
-// it might be better to eg pulse by "net bytes used"
+// TODO: we now inline based on total TF of the term,
+// but it might be better to inline by "net bytes used"
// so that a term that has only 1 posting but a huge
// payload would not be inlined. Though this is
// presumably rare in practice...
@@ -62,8 +62,9 @@ public final class PulsingPostingsWriter
}
// TODO: -- lazy init this? ie, if every single term
- // was pulsed then we never need to use this fallback?
- // Fallback writer for non-pulsed terms:
+ // was inlined (eg for a "primary key" field) then we
+ // never need to use this fallback? Fallback writer for
+ // non-inlined terms:
final PostingsWriterBase wrappedPostingsWriter;
/** If the total number of positions (summed across all docs
@@ -173,22 +174,18 @@ public final class PulsingPostingsWriter
}
}
- private boolean pendingIsIndexTerm;
-
private final RAMOutputStream buffer = new RAMOutputStream();
+ private final RAMOutputStream buffer2 = new RAMOutputStream();
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
- //System.out.println("PW finishTerm docCount=" + docCount);
+ public void finishTerm(TermStats stats) throws IOException {
+ //System.out.println("PW finishTerm docCount=" + stats.docFreq);
assert pendingCount > 0 || pendingCount == -1;
- pendingIsIndexTerm |= isIndexTerm;
-
if (pendingCount == -1) {
- wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
- pendingIsIndexTerm = false;
+ wrappedPostingsWriter.finishTerm(stats);
} else {
// There were few enough total occurrences for this
@@ -254,8 +251,8 @@ public final class PulsingPostingsWriter
}
//System.out.println(" bytes=" + buffer.getFilePointer());
- termsOut.writeVInt((int) buffer.getFilePointer());
- buffer.writeTo(termsOut);
+ buffer2.writeVInt((int) buffer.getFilePointer());
+ buffer.writeTo(buffer2);
buffer.reset();
}
@@ -267,6 +264,18 @@ public final class PulsingPostingsWriter
wrappedPostingsWriter.close();
}
+ @Override
+ public void flushTermsBlock() throws IOException {
+ termsOut.writeVInt((int) buffer2.getFilePointer());
+ buffer2.writeTo(termsOut);
+ buffer2.reset();
+
+ // TODO: can we avoid calling this if all terms
+ // were inlined...? Eg for a "primary key" field, the
+ // wrapped codec is never invoked...
+ wrappedPostingsWriter.flushTermsBlock();
+ }
+
// Pushes pending positions to the wrapped codec
private void push() throws IOException {
//System.out.println("PW now push @ " + pendingCount + " wrapped=" + wrappedPostingsWriter);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Thu Jan 20 18:53:55 2011
@@ -17,11 +17,11 @@ package org.apache.lucene.index.codecs.s
* limitations under the License.
*/
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.IntsRef;
-
-import java.io.IOException;
import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.IntsRef;
/** Defines basic API for writing ints to an IndexOutput.
* IntBlockCodec interacts with this API. @see
@@ -39,7 +39,7 @@ public abstract class IntIndexInput impl
// TODO: -- can we simplify this?
public abstract static class Index {
- public abstract void read(IndexInput indexIn, boolean absolute) throws IOException;
+ public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
public abstract void read(IntIndexInput.Reader indexIn, boolean absolute) throws IOException;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Thu Jan 20 18:53:55 2011
@@ -20,16 +20,18 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -130,16 +132,31 @@ public class SepPostingsReaderImpl exten
}
}
- private static final class SepTermState extends PrefixCodedTermState {
+ private static final class SepTermState extends BlockTermState {
// We store only the seek point to the docs file because
// the rest of the info (freqIndex, posIndex, etc.) is
// stored in the docs file:
IntIndexInput.Index docIndex;
-
+ IntIndexInput.Index posIndex;
+ IntIndexInput.Index freqIndex;
+ long payloadFP;
+ long skipFP;
+
+ // Only used for "primary" term state; these are never
+ // copied on clone:
+ byte[] bytes;
+ ByteArrayDataInput bytesReader;
+
@Override
public Object clone() {
SepTermState other = (SepTermState) super.clone();
other.docIndex = (IntIndexInput.Index) docIndex.clone();
+ if (freqIndex != null) {
+ other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
+ }
+ if (posIndex != null) {
+ other.posIndex = (IntIndexInput.Index) posIndex.clone();
+ }
return other;
}
@@ -147,28 +164,87 @@ public class SepPostingsReaderImpl exten
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
docIndex.set(other.docIndex);
+ if (freqIndex != null && other.freqIndex != null) {
+ freqIndex.set(other.freqIndex);
+ }
+ if (posIndex != null && other.posIndex != null) {
+ posIndex.set(other.posIndex);
+ }
+ payloadFP = other.payloadFP;
+ skipFP = other.skipFP;
}
@Override
public String toString() {
- return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord + " docIndex=" + docIndex;
+ return super.toString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex + " payloadFP=" + payloadFP + " skipFP=" + skipFP;
}
}
@Override
- public PrefixCodedTermState newTermState() throws IOException {
- final SepTermState state = new SepTermState();
+ public BlockTermState newTermState() throws IOException {
+ final SepTermState state = new SepTermState();
state.docIndex = docIn.index();
+ if (freqIn != null) {
+ state.freqIndex = freqIn.index();
+ }
+ if (posIn != null) {
+ state.posIndex = posIn.index();
+ }
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException {
- ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final SepTermState termState = (SepTermState) _termState;
+ final int len = termsIn.readVInt();
+ //System.out.println("SepR.readTermsBlock len=" + len);
+ if (termState.bytes == null) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ termState.bytesReader = new ByteArrayDataInput(termState.bytes);
+ } else if (termState.bytes.length < len) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ termState.bytesReader.reset(termState.bytes, 0, len);
+ termsIn.readBytes(termState.bytes, 0, len);
+ }
+
+ @Override
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final SepTermState termState = (SepTermState) _termState;
+ //System.out.println("SepR.nextTerm termCount=" + termState.termCount);
+ //System.out.println(" docFreq=" + termState.docFreq);
+ final boolean isFirstTerm = termState.termCount == 0;
+ termState.docIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" docIndex=" + termState.docIndex);
+ if (!fieldInfo.omitTermFreqAndPositions) {
+ termState.freqIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" freqIndex=" + termState.freqIndex);
+ termState.posIndex.read(termState.bytesReader, isFirstTerm);
+ //System.out.println(" posIndex=" + termState.posIndex);
+ if (fieldInfo.storePayloads) {
+ if (isFirstTerm) {
+ termState.payloadFP = termState.bytesReader.readVLong();
+ } else {
+ termState.payloadFP += termState.bytesReader.readVLong();
+ }
+ //System.out.println(" payloadFP=" + termState.payloadFP);
+ }
+ }
+ if (termState.docFreq >= skipInterval) {
+ //System.out.println(" readSkip @ " + termState.bytesReader.pos);
+ if (isFirstTerm) {
+ termState.skipFP = termState.bytesReader.readVLong();
+ } else {
+ termState.skipFP += termState.bytesReader.readVLong();
+ }
+ //System.out.println(" skipFP=" + termState.skipFP);
+ } else if (isFirstTerm) {
+ termState.skipFP = termState.bytesReader.readVLong();
+ }
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
@@ -187,7 +263,7 @@ public class SepPostingsReaderImpl exten
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
@@ -219,7 +295,7 @@ public class SepPostingsReaderImpl exten
private Bits skipDocs;
private final IntIndexInput.Reader docReader;
private final IntIndexInput.Reader freqReader;
- private long skipOffset;
+ private long skipFP;
private final IntIndexInput.Index docIndex;
private final IntIndexInput.Index freqIndex;
@@ -260,18 +336,15 @@ public class SepPostingsReaderImpl exten
docIndex.seek(docReader);
if (!omitTF) {
- freqIndex.read(docReader, true);
+ freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
-
- posIndex.read(docReader, true);
- // skip payload offset
- docReader.readVLong();
} else {
freq = 1;
}
- skipOffset = docReader.readVLong();
docFreq = termState.docFreq;
+ // NOTE: unused if docFreq < skipInterval:
+ skipFP = termState.skipFP;
count = 0;
doc = 0;
skipped = false;
@@ -290,9 +363,11 @@ public class SepPostingsReaderImpl exten
count++;
// Decode next doc
+ //System.out.println("decode docDelta:");
doc += docReader.next();
if (!omitTF) {
+ //System.out.println("decode freq:");
freq = freqReader.next();
}
@@ -300,13 +375,13 @@ public class SepPostingsReaderImpl exten
break;
}
}
-
return doc;
}
@Override
public int read() throws IOException {
// TODO: -- switch to bulk read api in IntIndexInput
+ //System.out.println("sepdocs read");
final int[] docs = bulkResult.docs.ints;
final int[] freqs = bulkResult.freqs.ints;
int i = 0;
@@ -314,14 +389,17 @@ public class SepPostingsReaderImpl exten
while (i < length && count < docFreq) {
count++;
// manually inlined call to next() for speed
+ //System.out.println("decode doc");
doc += docReader.next();
if (!omitTF) {
+ //System.out.println("decode freq");
freq = freqReader.next();
}
if (skipDocs == null || !skipDocs.get(doc)) {
docs[i] = doc;
freqs[i] = freq;
+ //System.out.println(" docs[" + i + "]=" + doc + " count=" + count + " dF=" + docFreq);
i++;
}
}
@@ -361,7 +439,7 @@ public class SepPostingsReaderImpl exten
if (!skipped) {
// We haven't yet skipped for this posting
- skipper.init(skipOffset,
+ skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
@@ -411,14 +489,14 @@ public class SepPostingsReaderImpl exten
private final IntIndexInput.Reader freqReader;
private final IntIndexInput.Reader posReader;
private final IndexInput payloadIn;
- private long skipOffset;
+ private long skipFP;
private final IntIndexInput.Index docIndex;
private final IntIndexInput.Index freqIndex;
private final IntIndexInput.Index posIndex;
private final IntIndexInput startDocIn;
- private long payloadOffset;
+ private long payloadFP;
private int pendingPosCount;
private int position;
@@ -444,21 +522,26 @@ public class SepPostingsReaderImpl exten
SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
this.skipDocs = skipDocs;
storePayloads = fieldInfo.storePayloads;
+ //System.out.println("Sep D&P init");
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
+ //System.out.println(" docIndex=" + docIndex);
- freqIndex.read(docReader, true);
+ freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
+ //System.out.println(" freqIndex=" + freqIndex);
- posIndex.read(docReader, true);
+ posIndex.set(termState.posIndex);
+ //System.out.println(" posIndex=" + posIndex);
posSeekPending = true;
payloadPending = false;
- payloadOffset = docReader.readVLong();
- skipOffset = docReader.readVLong();
+ payloadFP = termState.payloadFP;
+ skipFP = termState.skipFP;
+ //System.out.println(" skipFP=" + skipFP);
docFreq = termState.docFreq;
count = 0;
@@ -484,8 +567,10 @@ public class SepPostingsReaderImpl exten
// freq=1 case?
// Decode next doc
+ //System.out.println(" sep d&p read doc");
doc += docReader.next();
-
+
+ //System.out.println(" sep d&p read freq");
freq = freqReader.next();
pendingPosCount += freq;
@@ -511,6 +596,7 @@ public class SepPostingsReaderImpl exten
@Override
public int advance(int target) throws IOException {
+ //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
// TODO: jump right to next() if target is < X away
// from where we are now?
@@ -521,6 +607,7 @@ public class SepPostingsReaderImpl exten
// skip data
if (skipper == null) {
+ //System.out.println(" create skipper");
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
freqIn,
@@ -530,46 +617,54 @@ public class SepPostingsReaderImpl exten
}
if (!skipped) {
+ //System.out.println(" init skip data skipFP=" + skipFP);
// We haven't yet skipped for this posting
- skipper.init(skipOffset,
+ skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
- payloadOffset,
+ payloadFP,
docFreq,
storePayloads);
skipped = true;
}
-
final int newCount = skipper.skipTo(target);
+ //System.out.println(" skip newCount=" + newCount + " vs " + count);
if (newCount > count) {
// Skipper did move
skipper.getFreqIndex().seek(freqReader);
skipper.getDocIndex().seek(docReader);
- //skipper.getPosIndex().seek(posReader);
+ // NOTE: don't seek pos here; do it lazily
+ // instead. Eg a PhraseQuery may skip to many
+ // docs before finally asking for positions...
posIndex.set(skipper.getPosIndex());
posSeekPending = true;
count = newCount;
doc = skipper.getDoc();
+ //System.out.println(" moved to doc=" + doc);
//payloadIn.seek(skipper.getPayloadPointer());
- payloadOffset = skipper.getPayloadPointer();
+ payloadFP = skipper.getPayloadPointer();
pendingPosCount = 0;
pendingPayloadBytes = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
+ //System.out.println(" move payloadLen=" + payloadLength);
}
}
// Now, linear scan for the rest:
do {
if (nextDoc() == NO_MORE_DOCS) {
+ //System.out.println(" advance nextDoc=END");
return NO_MORE_DOCS;
}
+ //System.out.println(" advance nextDoc=" + doc);
} while (target > doc);
+ //System.out.println(" return doc=" + doc);
return doc;
}
@@ -577,7 +672,7 @@ public class SepPostingsReaderImpl exten
public int nextPosition() throws IOException {
if (posSeekPending) {
posIndex.seek(posReader);
- payloadIn.seek(payloadOffset);
+ payloadIn.seek(payloadFP);
posSeekPending = false;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Thu Jan 20 18:53:55 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.index.SegmentWr
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -69,8 +70,7 @@ public final class SepPostingsWriterImpl
boolean storePayloads;
boolean omitTF;
- // Starts a new term
- long lastSkipStart;
+ long lastSkipFP;
FieldInfo fieldInfo;
@@ -80,7 +80,10 @@ public final class SepPostingsWriterImpl
long lastPayloadStart;
int lastDocID;
int df;
- private boolean firstDoc;
+ private int pendingTermCount;
+
+ // Holds pending byte[] blob for the current terms block
+ private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException {
super();
@@ -144,13 +147,9 @@ public final class SepPostingsWriterImpl
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
- firstDoc = true;
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
- // TODO: -- should we NOT reuse across fields? would
- // be cleaner
-
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
@@ -161,27 +160,13 @@ public final class SepPostingsWriterImpl
storePayloads = !omitTF && fieldInfo.storePayloads;
}
-
/** Adds a new doc in this term. If this returns null
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
- if (firstDoc) {
- // TODO: we are writing absolute file pointers below,
- // which is wasteful. It'd be better compression to
- // write the "baseline" into each indexed term, then
- // write only the delta here.
- if (!omitTF) {
- freqIndex.write(docOut, true);
- posIndex.write(docOut, true);
- docOut.writeVLong(payloadStart);
- }
- docOut.writeVLong(skipOut.getFilePointer());
- firstDoc = false;
- }
-
final int delta = docID - lastDocID;
+ //System.out.println("SepW startDoc: write doc=" + docID + " delta=" + delta);
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
@@ -190,6 +175,7 @@ public final class SepPostingsWriterImpl
if ((++df % skipInterval) == 0) {
// TODO: -- awkward we have to make these two
// separate calls to skipper
+ //System.out.println(" buffer skip lastDocID=" + lastDocID);
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
@@ -197,10 +183,20 @@ public final class SepPostingsWriterImpl
lastDocID = docID;
docOut.write(delta);
if (!omitTF) {
+ //System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq);
}
}
+ @Override
+ public void flushTermsBlock() throws IOException {
+ //System.out.println("SepW.flushTermsBlock: pendingTermCount=" + pendingTermCount + " bytesUsed=" + indexBytesWriter.getFilePointer());
+ termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
+ indexBytesWriter.writeTo(termsOut);
+ indexBytesWriter.reset();
+ pendingTermCount = 0;
+ }
+
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
@@ -240,20 +236,57 @@ public final class SepPostingsWriterImpl
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
-
+ public void finishTerm(TermStats stats) throws IOException {
// TODO: -- wasteful we are counting this in two places?
assert stats.docFreq > 0;
assert stats.docFreq == df;
- docIndex.write(termsOut, isIndexTerm);
+ final boolean isFirstTerm = pendingTermCount == 0;
+ //System.out.println("SepW.finishTerm: isFirstTerm=" + isFirstTerm);
+
+ docIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" docIndex=" + docIndex);
+
+ if (!omitTF) {
+ freqIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" freqIndex=" + freqIndex);
+
+ posIndex.write(indexBytesWriter, isFirstTerm);
+ //System.out.println(" posIndex=" + posIndex);
+ if (storePayloads) {
+ if (isFirstTerm) {
+ indexBytesWriter.writeVLong(payloadStart);
+ } else {
+ indexBytesWriter.writeVLong(payloadStart - lastPayloadStart);
+ }
+ lastPayloadStart = payloadStart;
+ //System.out.println(" payloadFP=" + payloadStart);
+ }
+ }
if (df >= skipInterval) {
+ //System.out.println(" skipFP=" + skipStart);
+ final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);
+ //System.out.println(" writeSkip @ " + indexBytesWriter.getFilePointer());
+ if (isFirstTerm) {
+ indexBytesWriter.writeVLong(skipFP);
+ } else {
+ indexBytesWriter.writeVLong(skipFP - lastSkipFP);
+ }
+ lastSkipFP = skipFP;
+ } else if (isFirstTerm) {
+ // TODO: this is somewhat wasteful; eg if no terms in
+ // this block will use skip data, we don't need to
+ // write this:
+ final long skipFP = skipOut.getFilePointer();
+ indexBytesWriter.writeVLong(skipFP);
+ lastSkipFP = skipFP;
}
lastDocID = 0;
df = 0;
+ pendingTermCount++;
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Thu Jan 20 18:53:55 2011
@@ -33,8 +33,8 @@ import org.apache.lucene.index.codecs.Te
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.store.Directory;
/** Default codec.
@@ -66,7 +66,7 @@ public class StandardCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -103,15 +103,15 @@ public class StandardCodec extends Codec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postings,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postings,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -134,7 +134,7 @@ public class StandardCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, id, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+ BlockTermsReader.files(dir, segmentInfo, id, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@@ -146,7 +146,7 @@ public class StandardCodec extends Codec
public static void getStandardExtensions(Set<String> extensions) {
extensions.add(FREQ_EXTENSION);
extensions.add(PROX_EXTENSION);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
VariableGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Thu Jan 20 18:53:55 2011
@@ -20,16 +20,18 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -46,9 +48,12 @@ public class StandardPostingsReader exte
int skipInterval;
int maxSkipLevels;
+ //private String segment;
+
public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, String codecId) throws IOException {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
readBufferSize);
+ //this.segment = segmentInfo.name;
if (segmentInfo.getHasProx()) {
boolean success = false;
try {
@@ -84,11 +89,16 @@ public class StandardPostingsReader exte
}
// Must keep final because we do non-standard clone
- private final static class StandardTermState extends PrefixCodedTermState {
+ private final static class StandardTermState extends BlockTermState {
long freqOffset;
long proxOffset;
int skipOffset;
+ // Only used by the "primary" TermState -- clones don't
+ // copy this (basically they are "transient"):
+ ByteArrayDataInput bytesReader;
+ byte[] bytes;
+
public Object clone() {
StandardTermState other = new StandardTermState();
other.copyFrom(this);
@@ -101,6 +111,11 @@ public class StandardPostingsReader exte
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
+
+ // Do not copy bytes, bytesReader (else TermState is
+ // very heavy, ie drags around the entire block's
+ // byte[]). On seek back, if next() is in fact used
+ // (rare!), they will be re-read from disk.
}
public String toString() {
@@ -109,7 +124,7 @@ public class StandardPostingsReader exte
}
@Override
- public PrefixCodedTermState newTermState() {
+ public BlockTermState newTermState() {
return new StandardTermState();
}
@@ -126,34 +141,58 @@ public class StandardPostingsReader exte
}
}
+ /* Reads but does not decode the byte[] blob holding
+ metadata for the current terms block */
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm)
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final StandardTermState termState = (StandardTermState) _termState;
+
+ final int len = termsIn.readVInt();
+ //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
+ if (termState.bytes == null) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ termState.bytesReader = new ByteArrayDataInput(null);
+ } else if (termState.bytes.length < len) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+
+ termsIn.readBytes(termState.bytes, 0, len);
+ termState.bytesReader.reset(termState.bytes, 0, len);
+ }
+
+ @Override
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
throws IOException {
- final StandardTermState docTermState = (StandardTermState) termState;
+ final StandardTermState termState = (StandardTermState) _termState;
+ //System.out.println("StandardR.nextTerm seg=" + segment);
+ final boolean isFirstTerm = termState.termCount == 0;
- if (isIndexTerm) {
- docTermState.freqOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.freqOffset = termState.bytesReader.readVLong();
} else {
- docTermState.freqOffset += termsIn.readVLong();
+ termState.freqOffset += termState.bytesReader.readVLong();
}
+ //System.out.println(" freqFP=" + termState.freqOffset);
- if (docTermState.docFreq >= skipInterval) {
- docTermState.skipOffset = termsIn.readVInt();
+ if (termState.docFreq >= skipInterval) {
+ termState.skipOffset = termState.bytesReader.readVInt();
+ //System.out.println(" skipOffset=" + termState.skipOffset);
} else {
- docTermState.skipOffset = 0;
+ // undefined
}
if (!fieldInfo.omitTermFreqAndPositions) {
- if (isIndexTerm) {
- docTermState.proxOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.proxOffset = termState.bytesReader.readVLong();
} else {
- docTermState.proxOffset += termsIn.readVLong();
+ termState.proxOffset += termState.bytesReader.readVLong();
}
+ //System.out.println(" proxFP=" + termState.proxOffset);
}
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
@@ -170,7 +209,7 @@ public class StandardPostingsReader exte
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
}
@@ -248,8 +287,10 @@ public class StandardPostingsReader exte
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
+ assert limit > 0;
ord = 0;
doc = 0;
+ //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
@@ -420,6 +461,8 @@ public class StandardPostingsReader exte
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
+ assert limit > 0;
+
ord = 0;
doc = 0;
position = 0;
@@ -430,6 +473,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@@ -438,6 +482,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -461,6 +506,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -477,6 +523,8 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
+ //System.out.println("StandardR.D&PE advance target=" + target);
+
// TODO: jump right to next() if target is < X away
// from where we are now?
@@ -622,6 +670,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@@ -630,6 +679,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -653,6 +703,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -748,6 +799,7 @@ public class StandardPostingsReader exte
posPendingCount--;
position = 0;
payloadPending = false;
+ //System.out.println("StandardR.D&PE skipPos");
}
// read next position
@@ -771,6 +823,7 @@ public class StandardPostingsReader exte
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
+ //System.out.println("StandardR.D&PE nextPos return pos=" + position);
return position;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Thu Jan 20 18:53:55 2011
@@ -22,13 +22,14 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.TermStats;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -59,8 +60,15 @@ public final class StandardPostingsWrite
int lastPayloadLength;
int lastPosition;
+ private int pendingCount;
+
+ //private String segment;
+
+ private RAMOutputStream bytesWriter = new RAMOutputStream();
+
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
super();
+ //this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName);
@@ -96,6 +104,7 @@ public final class StandardPostingsWrite
@Override
public void startTerm() {
+ //System.out.println("StandardW: startTerm seg=" + segment + " pendingCount=" + pendingCount);
freqStart = freqOut.getFilePointer();
if (proxOut != null) {
proxStart = proxOut.getFilePointer();
@@ -109,9 +118,12 @@ public final class StandardPostingsWrite
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
+ //System.out.println("SPW: setField");
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
+ //System.out.println(" set init blockFreqStart=" + freqStart);
+ //System.out.println(" set init blockProxStart=" + proxStart);
}
int lastDocID;
@@ -121,6 +133,7 @@ public final class StandardPostingsWrite
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
+ //System.out.println("StandardW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq);
final int delta = docID - lastDocID;
@@ -151,6 +164,7 @@ public final class StandardPostingsWrite
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
+ //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
assert proxOut != null;
@@ -185,40 +199,51 @@ public final class StandardPostingsWrite
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
+ public void finishTerm(TermStats stats) throws IOException {
+ //System.out.println("StandardW.finishTerm seg=" + segment);
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(freqStart);
+ final boolean isFirstTerm = pendingCount == 0;
+ //System.out.println(" isFirstTerm=" + isFirstTerm);
+
+ //System.out.println(" freqFP=" + freqStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(freqStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(freqStart - lastFreqStart);
+ bytesWriter.writeVLong(freqStart-lastFreqStart);
}
-
lastFreqStart = freqStart;
if (df >= skipInterval) {
- termsOut.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
+ bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
-
+
if (!omitTermFreqAndPositions) {
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(proxStart);
+ //System.out.println(" proxFP=" + proxStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(proxStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(proxStart - lastProxStart);
+ bytesWriter.writeVLong(proxStart - lastProxStart);
}
lastProxStart = proxStart;
}
-
+
lastDocID = 0;
df = 0;
+ pendingCount++;
+ }
+
+ @Override
+ public void flushTermsBlock() throws IOException {
+ //System.out.println("SPW.flushBlock pendingCount=" + pendingCount);
+ termsOut.writeVInt((int) bytesWriter.getFilePointer());
+ bytesWriter.writeTo(termsOut);
+ bytesWriter.reset();
+ pendingCount = 0;
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Thu Jan 20 18:53:55 2011
@@ -122,12 +122,12 @@ public abstract class FilteredTermsEnum
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return tenum.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return tenum.totalTermFreq();
}
@@ -166,7 +166,7 @@ public abstract class FilteredTermsEnum
* @throws UnsupportedOperationException
*/
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Thu Jan 20 18:53:55 2011
@@ -241,12 +241,12 @@ public final class FuzzyTermsEnum extend
// proxy all other enum calls to the actual enum
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return actualEnum.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return actualEnum.totalTermFreq();
}
@@ -261,8 +261,8 @@ public final class FuzzyTermsEnum extend
return actualEnum.docsAndPositions(skipDocs, reuse);
}
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
- return actualEnum.seek(term, state);
+ public void seek(BytesRef term, TermState state) throws IOException {
+ actualEnum.seek(term, state);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Thu Jan 20 18:53:55 2011
@@ -341,9 +341,9 @@ public class DocTermsIndexCreator extend
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
- return this.seek(((OrdTermState)state).ord);
+ this.seek(((OrdTermState)state).ord);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java Thu Jan 20 18:53:55 2011
@@ -21,7 +21,9 @@ package org.apache.lucene.store;
public final class ByteArrayDataInput extends DataInput {
private byte[] bytes;
+
private int pos;
+ private int limit;
// TODO: allow BytesRef (slice) too
public ByteArrayDataInput(byte[] bytes) {
@@ -29,27 +31,80 @@ public final class ByteArrayDataInput ex
}
public void reset(byte[] bytes) {
+ reset(bytes, 0, bytes.length);
+ }
+
+ public int getPosition() {
+ return pos;
+ }
+
+ public void reset(byte[] bytes, int offset, int len) {
this.bytes = bytes;
- pos = 0;
+ pos = offset;
+ limit = len;
}
public boolean eof() {
- return pos == bytes.length;
+ return pos == limit;
}
public void skipBytes(int count) {
pos += count;
}
+ @Override
+ public short readShort() {
+ return (short) (((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF));
+ }
+
+ @Override
+ public int readInt() {
+ return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
+ | ((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF);
+ }
+
+ @Override
+ public long readLong() {
+ final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
+ }
+
+ @Override
+ public int readVInt() {
+ byte b = bytes[pos++];
+ int i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = bytes[pos++];
+ i |= (b & 0x7F) << shift;
+ }
+ return i;
+ }
+
+ @Override
+ public long readVLong() {
+ byte b = bytes[pos++];
+ long i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = bytes[pos++];
+ i |= (b & 0x7FL) << shift;
+ }
+ return i;
+ }
+
// NOTE: AIOOBE not EOF if you read too much
@Override
public byte readByte() {
+ assert pos < limit;
return bytes[pos++];
}
// NOTE: AIOOBE not EOF if you read too much
@Override
public void readBytes(byte[] b, int offset, int len) {
+ assert pos + len <= limit;
System.arraycopy(bytes, pos, b, offset, len);
pos += len;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java Thu Jan 20 18:53:55 2011
@@ -79,7 +79,7 @@ public abstract class DataInput implemen
* supported.
* @see DataOutput#writeVInt(int)
*/
- public final int readVInt() throws IOException {
+ public int readVInt() throws IOException {
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
@@ -99,7 +99,7 @@ public abstract class DataInput implemen
/** Reads a long stored in variable-length format. Reads between one and
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. */
- public final long readVLong() throws IOException {
+ public long readVLong() throws IOException {
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Thu Jan 20 18:53:55 2011
@@ -245,7 +245,6 @@ public class TestExternalCodecs extends
}
}
-
// Classes for reading from the postings state
static class RAMFieldsEnum extends FieldsEnum {
private final RAMPostings postings;
@@ -543,7 +542,7 @@ public class TestExternalCodecs extends
// Terms dict
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator);
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator);
success = true;
return ret;
} finally {
@@ -584,15 +583,15 @@ public class TestExternalCodecs extends
// Terms dict reader
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- pulsingReader,
- state.readBufferSize,
- reverseUnicodeComparator,
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ pulsingReader,
+ state.readBufferSize,
+ reverseUnicodeComparator,
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -609,7 +608,7 @@ public class TestExternalCodecs extends
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@@ -637,6 +636,7 @@ public class TestExternalCodecs extends
setCodecProvider(provider).
setMergePolicy(newLogMergePolicy(3))
);
+ w.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
// uses default codec:
doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED));
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java Thu Jan 20 18:53:55 2011
@@ -111,6 +111,9 @@ public class TestSearch extends LuceneTe
for (int j = 0; j < queries.length; j++) {
Query query = parser.parse(queries[j]);
out.println("Query: " + query.toString("contents"));
+ if (VERBOSE) {
+ System.out.println("TEST: query=" + query);
+ }
hits = searcher.search(query, null, 1000).scoreDocs;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Thu Jan 20 18:53:55 2011
@@ -108,6 +108,9 @@ public class TestSearchForDuplicates ext
Query query = parser.parse(HIGH_PRIORITY);
out.println("Query: " + query.toString(PRIORITY_FIELD));
+ if (VERBOSE) {
+ System.out.println("TEST: search query=" + query);
+ }
ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS).scoreDocs;
printHits(out, hits, searcher);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Thu Jan 20 18:53:55 2011
@@ -55,6 +55,7 @@ public class TestAddIndexes extends Luce
writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer())
.setOpenMode(OpenMode.CREATE));
+ writer.setInfoStream(VERBOSE ? System.out : null);
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Jan 20 18:53:55 2011
@@ -882,6 +882,10 @@ public class TestIndexReader extends Luc
// First build up a starting index:
MockDirectoryWrapper startDir = newDirectory();
IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ if (VERBOSE) {
+ System.out.println("TEST: create initial index");
+ writer.setInfoStream(System.out);
+ }
for(int i=0;i<157;i++) {
Document d = new Document();
d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
@@ -892,6 +896,19 @@ public class TestIndexReader extends Luc
}
writer.close();
+ {
+ IndexReader r = IndexReader.open(startDir);
+ IndexSearcher searcher = new IndexSearcher(r);
+ ScoreDoc[] hits = null;
+ try {
+ hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail("exception when init searching: " + e);
+ }
+ r.close();
+ }
+
long diskUsage = startDir.getRecomputedActualSizeInBytes();
long diskFree = diskUsage+100;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Thu Jan 20 18:53:55 2011
@@ -977,7 +977,11 @@ public class TestIndexReaderReopen exten
static void modifyIndex(int i, Directory dir) throws IOException {
switch (i) {
case 0: {
+ if (VERBOSE) {
+ System.out.println("TEST: modify index");
+ }
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ w.setInfoStream(VERBOSE ? System.out : null);
w.deleteDocuments(new Term("field2", "a11"));
w.deleteDocuments(new Term("field2", "b30"));
w.close();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java Thu Jan 20 18:53:55 2011
@@ -97,6 +97,9 @@ public class TestMultiFields extends Luc
for(int i=0;i<100;i++) {
BytesRef term = terms.get(random.nextInt(terms.size()));
+ if (VERBOSE) {
+ System.out.println("TEST: seek to term= "+ UnicodeUtil.toHexString(term.utf8ToString()));
+ }
DocsEnum docsEnum = terms2.docs(delDocs, term, null);
assertNotNull(docsEnum);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java Thu Jan 20 18:53:55 2011
@@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -126,7 +126,7 @@ public class MockFixedIntBlockCodec exte
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -164,15 +164,15 @@ public class MockFixedIntBlockCodec exte
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -189,14 +189,14 @@ public class MockFixedIntBlockCodec exte
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Thu Jan 20 18:53:55 2011
@@ -37,8 +37,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -150,7 +150,7 @@ public class MockVariableIntBlockCodec e
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -189,15 +189,15 @@ public class MockVariableIntBlockCodec e
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -214,14 +214,14 @@ public class MockVariableIntBlockCodec e
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Thu Jan 20 18:53:55 2011
@@ -26,6 +26,9 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
@@ -33,13 +36,11 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.mockintblock.MockFixedIntBlockCodec;
import org.apache.lucene.index.codecs.mockintblock.MockVariableIntBlockCodec;
import org.apache.lucene.index.codecs.mocksep.MockSingleIntFactory;
@@ -152,6 +153,10 @@ public class MockRandomCodec extends Cod
public boolean isIndexTerm(BytesRef term, TermStats stats) {
return random.nextInt(gap) == 17;
}
+
+ @Override
+ public void newField(FieldInfo fieldInfo) {
+ }
};
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
@@ -165,7 +170,7 @@ public class MockRandomCodec extends Cod
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -269,15 +274,15 @@ public class MockRandomCodec extends Cod
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- termsCacheSize,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ termsCacheSize,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -297,7 +302,7 @@ public class MockRandomCodec extends Cod
files.add(seedFileName);
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
@@ -315,7 +320,7 @@ public class MockRandomCodec extends Cod
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
VariableGapTermsIndexReader.getIndexExtensions(extensions);
extensions.add(SEED_EXT);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java Thu Jan 20 18:53:55 2011
@@ -30,8 +30,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -70,7 +70,7 @@ public class MockSepCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -108,15 +108,15 @@ public class MockSepCodec extends Codec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -133,7 +133,7 @@ public class MockSepCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@@ -144,7 +144,7 @@ public class MockSepCodec extends Codec
public static void getSepExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
-}
\ No newline at end of file
+}