You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/12/14 18:18:02 UTC
svn commit: r1049178 [2/2] - in /lucene/dev/branches/bulkpostings/lucene:
contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
contrib/memory/src/java/org/apache/lucene/index/memory/
src/java/org/apache/lucene/index/ src/java/org/apache/...
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Tue Dec 14 17:18:00 2010
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@@ -65,7 +66,7 @@ public class SepPostingsReaderImpl exten
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), readBufferSize);
if (segmentInfo.getHasProx()) {
- freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
+ freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), readBufferSize);
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), readBufferSize);
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), readBufferSize);
} else {
@@ -169,36 +170,37 @@ public class SepPostingsReaderImpl exten
public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
- if (reuse == null || !(reuse instanceof SepDocsEnum)) {
+ if (reuse == null || !(reuse instanceof SepDocsEnum) || !((SepDocsEnum) reuse).canReuse(docIn)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
- if (docsEnum.startDocIn != docIn) {
- // If you are using ParellelReader, and pass in a
- // reused DocsAndPositionsEnum, it could have come
- // from another reader also using sep codec
- docsEnum = new SepDocsEnum();
- }
}
return docsEnum.init(fieldInfo, termState, skipDocs);
}
@Override
+ public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ final SepTermState termState = (SepTermState) _termState;
+ SepBulkPostingsEnum postingsEnum;
+ if (reuse == null || !(reuse instanceof SepBulkPostingsEnum) || !((SepBulkPostingsEnum) reuse).canReuse(fieldInfo, docIn, doFreqs, doPositions)) {
+ postingsEnum = new SepBulkPostingsEnum(fieldInfo, doFreqs, doPositions);
+ } else {
+ postingsEnum = (SepBulkPostingsEnum) reuse;
+ }
+
+ return postingsEnum.init(termState);
+ }
+
+ @Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
- if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
+ if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum) || !((SepDocsAndPositionsEnum) reuse).canReuse(docIn)) {
postingsEnum = new SepDocsAndPositionsEnum();
} else {
postingsEnum = (SepDocsAndPositionsEnum) reuse;
- if (postingsEnum.startDocIn != docIn) {
- // If you are using ParellelReader, and pass in a
- // reused DocsAndPositionsEnum, it could have come
- // from another reader also using sep codec
- postingsEnum = new SepDocsAndPositionsEnum();
- }
}
return postingsEnum.init(fieldInfo, termState, skipDocs);
@@ -209,14 +211,19 @@ public class SepPostingsReaderImpl exten
int doc;
int count;
int freq;
- long freqStart;
// TODO: -- should we do omitTF with 2 different enum classes?
private boolean omitTF;
private boolean storePayloads;
private Bits skipDocs;
- private final IntIndexInput.Reader docReader;
- private final IntIndexInput.Reader freqReader;
+ private final BulkPostingsEnum.BlockReader docReader;
+ private final int[] docDeltaBuffer;
+ private int docDeltaUpto;
+ private int docDeltaLimit;
+ private final BulkPostingsEnum.BlockReader freqReader;
+ private final int[] freqBuffer;
+ private int freqUpto;
+ private int freqLimit;
private long skipOffset;
private final IntIndexInput.Index docIndex;
@@ -224,21 +231,22 @@ public class SepPostingsReaderImpl exten
private final IntIndexInput.Index posIndex;
private final IntIndexInput startDocIn;
- // TODO: -- should we do hasProx with 2 different enum classes?
-
boolean skipped;
SepSkipListReader skipper;
- SepDocsEnum() throws IOException {
+ public SepDocsEnum() throws IOException {
startDocIn = docIn;
docReader = docIn.reader();
+ docDeltaBuffer = docReader.getBuffer();
docIndex = docIn.index();
if (freqIn != null) {
freqReader = freqIn.reader();
+ freqBuffer = freqReader.getBuffer();
freqIndex = freqIn.index();
} else {
freqReader = null;
freqIndex = null;
+ freqBuffer = null;
}
if (posIn != null) {
posIndex = posIn.index(); // only init this so skipper can read it
@@ -247,6 +255,10 @@ public class SepPostingsReaderImpl exten
}
}
+ // nocommit -- somehow we have to prevent re-decode of
+ // the same block if we have just .next()'d to next term
+ // in the terms dict -- this is an O(N^2) cost to eg
+ // TermRangeQuery when it steps through low freq terms!!
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
this.skipDocs = skipDocs;
omitTF = fieldInfo.omitTermFreqAndPositions;
@@ -256,42 +268,82 @@ public class SepPostingsReaderImpl exten
// skipped consuming the previous docs?
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
+ docDeltaLimit = docReader.end();
+ docDeltaUpto = docReader.offset();
+ if (docDeltaUpto >= docDeltaLimit) {
+ docDeltaLimit = docReader.fill();
+ }
if (!omitTF) {
freqIndex.read(docReader, true);
freqIndex.seek(freqReader);
+ freqUpto = freqReader.offset();
+ freqLimit = freqReader.end();
+ if (freqUpto >= freqLimit) {
+ freqLimit = freqReader.fill();
+ }
+ //System.out.println(" freqIndex=" + freqIndex + " posIndex=" + posIndex);
posIndex.read(docReader, true);
+ // nocommit -- only store this if storePayloads is true
// skip payload offset
docReader.readVLong();
} else {
freq = 1;
}
+
skipOffset = docReader.readVLong();
+ docDeltaUpto = docReader.offset();
+ docDeltaLimit = docReader.end();
+
docFreq = termState.docFreq;
+ assert docFreq > 0;
count = 0;
doc = 0;
skipped = false;
+ //System.out.println(" docFreq=" + docFreq);
return this;
}
+ public boolean canReuse(IntIndexInput docsIn) {
+ return startDocIn == docsIn;
+ }
+
@Override
public int nextDoc() throws IOException {
+ //System.out.println(" sep.nextDoc");
while(true) {
if (count == docFreq) {
return doc = NO_MORE_DOCS;
}
+ assert docDeltaUpto <= docDeltaLimit: "docDeltaUpto=" + docDeltaUpto + " docDeltaLimit=" + docDeltaLimit;
+
+ if (docDeltaUpto == docDeltaLimit) {
+ // refill
+ //System.out.println(" fill docs");
+ docDeltaLimit = docReader.fill();
+ docDeltaUpto = 0;
+ }
+
count++;
// Decode next doc
- doc += docReader.next();
+ doc += docDeltaBuffer[docDeltaUpto++];
+ //System.out.println(" doc="+ doc + " docDeltaUpto=" + (docDeltaUpto-1) + " skipDocs=" + skipDocs + " deleted?=" + (skipDocs != null && skipDocs.get(doc)));
if (!omitTF) {
- freq = freqReader.next();
+ if (freqUpto == freqLimit) {
+ // refill
+ //System.out.println(" fill freqs");
+ freqLimit = freqReader.fill();
+ freqUpto = 0;
+ }
+
+ freq = freqBuffer[freqUpto++];
}
if (skipDocs == null || !skipDocs.get(doc)) {
@@ -303,30 +355,6 @@ public class SepPostingsReaderImpl exten
}
@Override
- public int read() throws IOException {
- // TODO: -- switch to bulk read api in IntIndexInput
- final int[] docs = bulkResult.docs.ints;
- final int[] freqs = bulkResult.freqs.ints;
- int i = 0;
- final int length = docs.length;
- while (i < length && count < docFreq) {
- count++;
- // manually inlined call to next() for speed
- doc += docReader.next();
- if (!omitTF) {
- freq = freqReader.next();
- }
-
- if (skipDocs == null || !skipDocs.get(doc)) {
- docs[i] = doc;
- freqs[i] = freq;
- i++;
- }
- }
- return i;
- }
-
- @Override
public int freq() {
return freq;
}
@@ -338,9 +366,11 @@ public class SepPostingsReaderImpl exten
@Override
public int advance(int target) throws IOException {
+ //System.out.println("SepDocsEnum.advance target=" + target);
// TODO: jump right to next() if target is < X away
// from where we are now?
+ //System.out.println("SepDocsEnum.advance target=" + target);
if (docFreq >= skipInterval) {
@@ -349,6 +379,7 @@ public class SepPostingsReaderImpl exten
if (skipper == null) {
// This DocsEnum has never done any skipping
+ //System.out.println(" init skipper");
skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
freqIn,
docIn,
@@ -358,6 +389,7 @@ public class SepPostingsReaderImpl exten
}
if (!skipped) {
+ //System.out.println(" init skipper2");
// We haven't yet skipped for this posting
skipper.init(skipOffset,
docIndex,
@@ -374,14 +406,25 @@ public class SepPostingsReaderImpl exten
final int newCount = skipper.skipTo(target);
if (newCount > count) {
-
// Skipper did move
if (!omitTF) {
skipper.getFreqIndex().seek(freqReader);
+ freqUpto = freqReader.offset();
+ freqLimit = freqReader.end();
+ if (freqUpto >= freqLimit) {
+ freqLimit = freqReader.fill();
+ }
}
skipper.getDocIndex().seek(docReader);
+ docDeltaUpto = docReader.offset();
+ docDeltaLimit = docReader.end();
+ if (docDeltaUpto >= docDeltaLimit) {
+ docDeltaLimit = docReader.fill();
+ }
+
count = newCount;
doc = skipper.getDoc();
+ //System.out.println(" did move count=" + newCount + " doc=" + doc);
}
}
@@ -401,91 +444,148 @@ public class SepPostingsReaderImpl exten
int doc;
int count;
int freq;
- long freqStart;
private boolean storePayloads;
private Bits skipDocs;
- private final IntIndexInput.Reader docReader;
- private final IntIndexInput.Reader freqReader;
- private final IntIndexInput.Reader posReader;
- private final IndexInput payloadIn;
+ private final BulkPostingsEnum.BlockReader docReader;
+ private final int[] docDeltaBuffer;
+ private int docDeltaUpto;
+ private int docDeltaLimit;
+ private final BulkPostingsEnum.BlockReader freqReader;
+ private final int[] freqBuffer;
+ private int freqUpto;
+ private int freqLimit;
+ private final BulkPostingsEnum.BlockReader posReader;
+ private final int[] posBuffer;
+ private int posUpto;
+ private int posLimit;
private long skipOffset;
+ private long payloadOffset;
+
+ private final IndexInput payloadIn;
private final IntIndexInput.Index docIndex;
private final IntIndexInput.Index freqIndex;
private final IntIndexInput.Index posIndex;
private final IntIndexInput startDocIn;
- private long payloadOffset;
-
private int pendingPosCount;
private int position;
private int payloadLength;
private long pendingPayloadBytes;
-
- private boolean skipped;
- private SepSkipListReader skipper;
private boolean payloadPending;
private boolean posSeekPending;
- SepDocsAndPositionsEnum() throws IOException {
+ boolean skipped;
+ SepSkipListReader skipper;
+
+ public SepDocsAndPositionsEnum() throws IOException {
startDocIn = docIn;
docReader = docIn.reader();
+ docDeltaBuffer = docReader.getBuffer();
docIndex = docIn.index();
freqReader = freqIn.reader();
+ freqBuffer = freqReader.getBuffer();
freqIndex = freqIn.index();
posReader = posIn.reader();
+ posBuffer = posReader.getBuffer();
posIndex = posIn.index();
payloadIn = (IndexInput) SepPostingsReaderImpl.this.payloadIn.clone();
}
+ // nocommit -- somehow we have to prevent re-decode of
+ // the same block if we have just .next()'d to next term
+ // in the terms dict -- this is an O(N^2) cost to eg
+ // TermRangeQuery when it steps through low freq terms!!
SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
this.skipDocs = skipDocs;
+ //System.out.println("sep d&p init");
+ assert !fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.set(termState.docIndex);
+ // nocommit -- verify, during merge, this seek is
+ // sometimes w/in block:
docIndex.seek(docReader);
+ docDeltaLimit = docReader.end();
+ docDeltaUpto = docReader.offset();
+ if (docDeltaUpto >= docDeltaLimit) {
+ docDeltaLimit = docReader.fill();
+ }
freqIndex.read(docReader, true);
freqIndex.seek(freqReader);
+ freqLimit = freqReader.end();
+ freqUpto = freqReader.offset();
+ if (freqUpto >= freqLimit) {
+ //System.out.println(" re-fill freqs freqMax=" + freqLimit);
+ freqLimit = freqReader.fill();
+ }
+ //System.out.println(" freqIndex=" + freqIndex);
posIndex.read(docReader, true);
posSeekPending = true;
payloadPending = false;
payloadOffset = docReader.readVLong();
+ //System.out.println(" payloadOffset=" + payloadOffset);
skipOffset = docReader.readVLong();
+ //System.out.println(" skipOffset=" + skipOffset);
+
+ docDeltaLimit = docReader.end();
+ docDeltaUpto = docReader.offset();
+ /*
+ if (docDeltaUpto >= docDeltaLimit) {
+ // nocommit -- needed anymore?
+ docDeltaLimit = docReader.fill();
+ docDeltaUpto = 0;
+ }
+ */
docFreq = termState.docFreq;
+ assert docFreq > 0;
count = 0;
doc = 0;
pendingPosCount = 0;
pendingPayloadBytes = 0;
skipped = false;
+ //System.out.println(" docUpto=" + docDeltaUpto + " docMax=" + docDeltaLimit + " freqUpto=" + freqUpto + " freqMax=" + freqLimit);
+
return this;
}
+ public boolean canReuse(IntIndexInput docsIn) {
+ return startDocIn == docsIn;
+ }
+
@Override
public int nextDoc() throws IOException {
-
while(true) {
if (count == docFreq) {
return doc = NO_MORE_DOCS;
}
- count++;
+ if (docDeltaUpto == docDeltaLimit) {
+ // refill
+ docDeltaLimit = docReader.fill();
+ docDeltaUpto = 0;
+ }
- // TODO: maybe we should do the 1-bit trick for encoding
- // freq=1 case?
+ count++;
// Decode next doc
- doc += docReader.next();
+ doc += docDeltaBuffer[docDeltaUpto++];
- freq = freqReader.next();
+ if (freqUpto == freqLimit) {
+ // refill
+ freqLimit = freqReader.fill();
+ freqUpto = 0;
+ }
+ freq = freqBuffer[freqUpto++];
pendingPosCount += freq;
if (skipDocs == null || !skipDocs.get(doc)) {
@@ -525,6 +625,7 @@ public class SepPostingsReaderImpl exten
docIn,
posIn,
maxSkipLevels, skipInterval);
+
}
if (!skipped) {
@@ -536,7 +637,6 @@ public class SepPostingsReaderImpl exten
payloadOffset,
docFreq,
storePayloads);
-
skipped = true;
}
@@ -546,13 +646,24 @@ public class SepPostingsReaderImpl exten
// Skipper did move
skipper.getFreqIndex().seek(freqReader);
+ freqUpto = freqReader.offset();
+ freqLimit = freqReader.end();
+ if (freqUpto >= freqLimit) {
+ freqLimit = freqReader.fill();
+ }
+
skipper.getDocIndex().seek(docReader);
- //skipper.getPosIndex().seek(posReader);
+ docDeltaUpto = docReader.offset();
+ docDeltaLimit = docReader.end();
+ if (docDeltaUpto >= docDeltaLimit) {
+ docDeltaLimit = docReader.fill();
+ }
+
posIndex.set(skipper.getPosIndex());
posSeekPending = true;
count = newCount;
doc = skipper.getDoc();
- //payloadIn.seek(skipper.getPayloadPointer());
+
payloadOffset = skipper.getPayloadPointer();
pendingPosCount = 0;
pendingPayloadBytes = 0;
@@ -575,6 +686,11 @@ public class SepPostingsReaderImpl exten
public int nextPosition() throws IOException {
if (posSeekPending) {
posIndex.seek(posReader);
+ posLimit = posReader.end();
+ posUpto = posReader.offset();
+ if (posUpto >= posLimit) {
+ posLimit = posReader.fill();
+ }
payloadIn.seek(payloadOffset);
posSeekPending = false;
}
@@ -582,10 +698,12 @@ public class SepPostingsReaderImpl exten
// scan over any docs that were iterated without their
// positions
while (pendingPosCount > freq) {
- final int code = posReader.next();
+
+ final int code = nextPosInt();
+
if (storePayloads && (code & 1) != 0) {
// Payload length has changed
- payloadLength = posReader.next();
+ payloadLength = nextPosInt();
assert payloadLength >= 0;
}
pendingPosCount--;
@@ -593,11 +711,12 @@ public class SepPostingsReaderImpl exten
pendingPayloadBytes += payloadLength;
}
- final int code = posReader.next();
+ final int code = nextPosInt();
+
if (storePayloads) {
if ((code & 1) != 0) {
// Payload length has changed
- payloadLength = posReader.next();
+ payloadLength = nextPosInt();
assert payloadLength >= 0;
}
position += code >> 1;
@@ -612,6 +731,14 @@ public class SepPostingsReaderImpl exten
return position;
}
+ private int nextPosInt() throws IOException {
+ if (posUpto == posLimit) {
+ posLimit = posReader.fill();
+ posUpto = 0;
+ }
+ return posBuffer[posUpto++];
+ }
+
private BytesRef payload;
@Override
@@ -645,4 +772,261 @@ public class SepPostingsReaderImpl exten
return payloadPending && payloadLength > 0;
}
}
+
+ class SepBulkPostingsEnum extends BulkPostingsEnum {
+ private int docFreq;
+
+ private final BulkPostingsEnum.BlockReader docReader;
+ private final IntIndexInput.Index docIndex;
+
+ private final BulkPostingsEnum.BlockReader freqReader;
+ private final IntIndexInput.Index freqIndex;
+
+ private final BulkPostingsEnum.BlockReader posReader;
+ private final IntIndexInput.Index posIndex;
+
+ private final boolean storePayloads;
+ private final boolean omitTF;
+ private long skipOffset;
+
+ private final IntIndexInput startDocIn;
+
+ private boolean skipped;
+ private SepSkipListReader skipper;
+
+ public SepBulkPostingsEnum(FieldInfo fieldInfo, boolean doFreq, boolean doPos) throws IOException {
+ this.storePayloads = fieldInfo.storePayloads;
+ this.omitTF = fieldInfo.omitTermFreqAndPositions;
+ startDocIn = docIn;
+ docReader = docIn.reader();
+ docIndex = docIn.index();
+
+ if (doFreq && !omitTF) {
+ freqReader = freqIn.reader();
+ } else {
+ freqReader = null;
+ }
+
+ if (doPos && !omitTF) {
+ if (storePayloads) {
+ // Must rewrite each posDelta:
+ posReader = new PosPayloadReader(posIn.reader());
+ } else {
+ // Pass through
+ posReader = posIn.reader();
+ }
+ } else {
+ posReader = null;
+ }
+
+ if (!omitTF) {
+ // we have to pull these even if doFreq is false
+ // just so we can decode the index from the docs
+ // file
+ freqIndex = freqIn.index();
+ posIndex = posIn.index();
+ } else {
+ posIndex = null;
+ freqIndex = null;
+ }
+ }
+
+ public boolean canReuse(FieldInfo fieldInfo, IntIndexInput docIn, boolean doFreq, boolean doPos) {
+ return fieldInfo.storePayloads == storePayloads &&
+ startDocIn == docIn &&
+ (freqReader != null || !doFreq) &&
+ (posReader != null || !doPos);
+ }
+
+ // nocommit -- make sure this is tested!!
+
+ // Only used when payloads were stored -- we cannot do
+ // pass-through read for this since the payload lengths
+ // are also encoded into the position deltas
+ private final class PosPayloadReader extends BulkPostingsEnum.BlockReader {
+ final BulkPostingsEnum.BlockReader other;
+ private int pendingOffset;
+ private int limit;
+ private boolean skipNext;
+
+ public PosPayloadReader(BulkPostingsEnum.BlockReader other) {
+ this.other = other;
+ }
+
+ void doAfterSeek() {}
+
+ @Override
+ public int[] getBuffer() {
+ return other.getBuffer();
+ }
+
+ // nocommit -- make sure this works correctly in the
+ // "reuse"/seek case
+ @Override
+ public int offset() {
+ pendingOffset = other.offset();
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int fill() throws IOException {
+ // Translate code back to pos deltas, and filter out
+ // any changes in payload length. NOTE: this is a
+ // perf hit on indices that encode payloads, even if
+ // they use "normal" positional queries
+ final int otherLimit = other.fill();
+ limit = 0;
+ final int[] buffer = other.getBuffer();
+ for(int i=pendingOffset;i<otherLimit;i++) {
+ if (skipNext) {
+ skipNext = false;
+ } else {
+ final int code = buffer[i];
+ buffer[limit++] = code >>> 1;
+ if ((code & 1) != 0) {
+ // skip the payload length
+ skipNext = true;
+ }
+ }
+ }
+ pendingOffset = 0;
+
+ return limit;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+ }
+
+ /** Position readers to the specified term */
+ SepBulkPostingsEnum init(SepTermState termState) throws IOException {
+
+ // nocommit -- make sure seek w/in buffer is efficient
+ // here:
+
+ // TODO: can't we only do this if consumer
+ // skipped consuming the previous docs?
+ docIndex.set(termState.docIndex);
+ docIndex.seek(docReader);
+ //System.out.println("sep init offset=" + docReader.offset() + " limit=" + docReader.end() + " omitTF=" + omitTF);
+ //System.out.println(" v[0]=" + docReader.getBuffer()[0]);
+
+ if (!omitTF) {
+ freqIndex.read(docReader, true);
+ if (freqReader != null) {
+ freqIndex.seek(freqReader);
+ }
+ posIndex.read(docReader, true);
+ // skip payload offset -- nocommit only store this
+ // if field has payloads
+ docReader.readVLong();
+ }
+
+ skipOffset = docReader.readVLong();
+ //System.out.println("skipOffset=" + skipOffset);
+
+ if (posReader != null) {
+ if (storePayloads) {
+ PosPayloadReader posPayloadReader = (PosPayloadReader) posReader;
+ posIndex.seek(posPayloadReader.other);
+ posPayloadReader.doAfterSeek();
+ } else {
+ posIndex.seek(posReader);
+ }
+ }
+
+ if (docReader.offset() >= docReader.end()) {
+ docReader.fill();
+ docReader.setOffset(0);
+ }
+
+ docFreq = termState.docFreq;
+ skipped = false;
+
+ return this;
+ }
+
+ @Override
+ public BulkPostingsEnum.BlockReader getDocDeltasReader() {
+ // Maximize perf -- just pass through the underlying
+ // intblock reader:
+ return docReader;
+ }
+
+ @Override
+ public BulkPostingsEnum.BlockReader getFreqsReader() {
+ // Maximize perf -- just pass through the underlying
+ // intblock reader:
+ return freqReader;
+ }
+
+ @Override
+ public BulkPostingsEnum.BlockReader getPositionDeltasReader() {
+ // Maximize perf -- just pass through the underlying
+ // intblock reader (if payloads were not indexed):
+ return posReader;
+ }
+
+ private final JumpResult jumpResult = new JumpResult();
+
+ @Override
+ public JumpResult jump(int target, int curCount) throws IOException {
+
+ if (docFreq >= skipInterval) {
+
+ // There are enough docs in the posting to have
+ // skip data
+
+ if (skipper == null) {
+ // This enum has never done any skipping
+ skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
+ freqIn,
+ docIn,
+ posIn,
+ maxSkipLevels, skipInterval);
+ }
+
+ if (!skipped) {
+ // We haven't yet skipped for this particular posting
+ skipper.init(skipOffset,
+ docIndex,
+ freqIndex,
+ posIndex,
+ 0,
+ docFreq,
+ storePayloads);
+ skipper.setOmitTF(omitTF);
+ skipped = true;
+ }
+
+ final int newCount = skipper.skipTo(target);
+ //System.out.println(" sep skip newCount=" + newCount + " vs count=" + curCount);
+
+ if (newCount > curCount) {
+
+ // Skipper did move -- seek all readers:
+ skipper.getDocIndex().seek(docReader);
+
+ if (freqReader != null) {
+ skipper.getFreqIndex().seek(freqReader);
+ }
+ if (posReader != null) {
+ skipper.getPosIndex().seek(posReader);
+ }
+
+ jumpResult.count = newCount;
+ jumpResult.docID = skipper.getDoc();
+ return jumpResult;
+ }
+ }
+ return null;
+ }
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Tue Dec 14 17:18:00 2010
@@ -181,6 +181,7 @@ public final class SepPostingsWriterImpl
posIndex.write(docOut, true);
docOut.writeVLong(payloadStart);
}
+ // nocommit -- only write if docFreq > skipInterval?
docOut.writeVLong(skipOut.getFilePointer());
firstDoc = false;
}
@@ -199,6 +200,7 @@ public final class SepPostingsWriterImpl
}
lastDocID = docID;
+ //System.out.println("sepw: write docID=" + docID);
docOut.write(delta);
if (!omitTF) {
freqOut.write(termDocFreq);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Tue Dec 14 17:18:00 2010
@@ -108,10 +108,10 @@ class SepSkipListReader extends MultiLev
for(int i=0;i<maxNumberOfSkipLevels;i++) {
docIndex[i].set(docBaseIndex);
- if (freqIndex != null) {
+ if (freqIndex != null && freqBaseIndex != null) {
freqIndex[i].set(freqBaseIndex);
}
- if (posBaseIndex != null) {
+ if (posBaseIndex != null && freqBaseIndex != null) {
posIndex[i].set(posBaseIndex);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Tue Dec 14 17:18:00 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldsEnu
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.store.IndexInput;
@@ -217,6 +218,17 @@ class SimpleTextFieldsReader extends Fie
}
@Override
+ public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreq, boolean doPositions) throws IOException {
+ SimpleTextBulkPostingsEnum bulkPostingsEnum;
+ if (reuse != null && reuse instanceof SimpleTextBulkPostingsEnum && ((SimpleTextBulkPostingsEnum) reuse).canReuse(in, doFreq, doPositions)) {
+ bulkPostingsEnum = (SimpleTextBulkPostingsEnum) reuse;
+ } else {
+ bulkPostingsEnum = new SimpleTextBulkPostingsEnum(doFreq, doPositions);
+ }
+ return bulkPostingsEnum.reset(docsStart, omitTF);
+ }
+
+ @Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@@ -439,6 +451,189 @@ class SimpleTextFieldsReader extends Fie
}
}
+ private class SimpleTextBulkPostingsEnum extends BulkPostingsEnum {
+ private final IndexInput inStart;
+ private final IndexInput in;
+ private final LineCountReader docDeltasReader;
+ private final FreqsReader freqsReader;
+ private final LineCountReader positionDeltasReader;
+
+ public SimpleTextBulkPostingsEnum(boolean doFreq, boolean doPositions) {
+ this.inStart = SimpleTextFieldsReader.this.in;
+ this.in = (IndexInput) this.inStart.clone();
+ docDeltasReader = new LineCountReader(DOC);
+ if (doFreq) {
+ freqsReader = new FreqsReader();
+ } else {
+ freqsReader = null;
+ }
+
+ if (doPositions) {
+ positionDeltasReader = new LineCountReader(POS);
+ } else {
+ positionDeltasReader = null;
+ }
+ }
+
+ public boolean canReuse(IndexInput in, boolean doFreq, boolean doPositions) {
+ return in == inStart && (doFreq == (freqsReader != null)) && (doPositions == (positionDeltasReader != null));
+ }
+
+ // reads docDeltas & positionDeltas
+ private class LineCountReader extends BlockReader {
+ private final BytesRef prefix;
+ private final int[] buffer = new int[64];
+ private final IndexInput in;
+ private final BytesRef scratch = new BytesRef(10);
+ private int lastValue;
+ private int limit;
+
+ public LineCountReader(BytesRef prefix) {
+ this.prefix = prefix;
+ this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+ }
+
+ public void reset(long fp) throws IOException {
+ lastValue = 0;
+ in.seek(fp);
+ fill();
+ }
+
+ @Override
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int offset() {
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ assert offset == 0;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ int upto = 0;
+ while(upto < buffer.length) {
+ readLine(in, scratch);
+ if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) {
+ break;
+ } else if (scratch.startsWith(prefix)) {
+ final int value = Integer.parseInt(new String(scratch.bytes, scratch.offset+prefix.length, scratch.length-prefix.length));
+ buffer[upto++] = value - lastValue;
+ lastValue = value;
+ }
+ }
+ return limit = upto;
+ }
+ }
+
+ private class FreqsReader extends BlockReader {
+ private final int[] buffer = new int[64];
+ private final IndexInput in;
+ private final BytesRef scratch = new BytesRef(10);
+ private int limit;
+ private boolean omitTF;
+
+ public FreqsReader() {
+ this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+ }
+
+ public void reset(long fp, boolean omitTF) throws IOException {
+ in.seek(fp);
+ this.omitTF = omitTF;
+ fill();
+ }
+
+ @Override
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int offset() {
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ assert offset == 0;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ int upto = 0;
+ int freq = -1;
+ long lastFP = in.getFilePointer();
+ while(upto < buffer.length) {
+ lastFP = in.getFilePointer();
+ readLine(in, scratch);
+ if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) {
+ if (freq != -1) {
+ buffer[upto++] = omitTF ? 1 : freq;
+ }
+ break;
+ } else if (scratch.startsWith(DOC)) {
+ if (freq != -1) {
+ buffer[upto++] = omitTF ? 1: freq;
+ }
+ freq = 0;
+ } else if (scratch.startsWith(POS)) {
+ freq++;
+ }
+ }
+ in.seek(lastFP);
+ return limit = upto;
+ }
+ }
+
+ public SimpleTextBulkPostingsEnum reset(long fp, boolean omitTF) throws IOException {
+
+ docDeltasReader.reset(fp);
+
+ if (freqsReader != null) {
+ freqsReader.reset(fp, omitTF);
+ }
+ if (positionDeltasReader != null) {
+ positionDeltasReader.reset(fp);
+ }
+ return this;
+ }
+
+ @Override
+ public BlockReader getDocDeltasReader() {
+ return docDeltasReader;
+ }
+
+ @Override
+ public BlockReader getPositionDeltasReader() {
+ return positionDeltasReader;
+ }
+
+ @Override
+ public BlockReader getFreqsReader() {
+ return freqsReader;
+ }
+
+ @Override
+ public JumpResult jump(int target, int curCount) {
+ return null;
+ }
+ }
+
private class SimpleTextTerms extends Terms {
private final String field;
private final long termsStart;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Tue Dec 14 17:18:00 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.codecs.PostingsReaderBase;
@@ -170,6 +171,17 @@ public class StandardPostingsReader exte
}
@Override
+ public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ SegmentBulkPostingsEnum postingsEnum;
+ if (reuse == null || !(reuse instanceof SegmentBulkPostingsEnum) || !((SegmentBulkPostingsEnum) reuse).canReuse(fieldInfo, freqIn, doFreqs, doPositions)) {
+ postingsEnum = new SegmentBulkPostingsEnum(fieldInfo.omitTermFreqAndPositions, doFreqs, doPositions);
+ } else {
+ postingsEnum = (SegmentBulkPostingsEnum) reuse;
+ }
+ return postingsEnum.reset(fieldInfo, (DocTermState) termState);
+ }
+
+ @Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
@@ -248,6 +260,7 @@ public class StandardPostingsReader exte
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
+ assert limit > 0;
ord = 0;
doc = 0;
@@ -420,6 +433,7 @@ public class StandardPostingsReader exte
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
+ assert limit > 0;
ord = 0;
doc = 0;
position = 0;
@@ -796,4 +810,328 @@ public class StandardPostingsReader exte
return payloadPending && payloadLength > 0;
}
}
+
+ static final int BULK_BUFFER_SIZE = 64;
+
+ // Bulk postings API
+ private final class SegmentBulkPostingsEnum extends BulkPostingsEnum {
+ private final IndexInput freqIn;
+ private final IndexInput proxIn;
+
+ final IndexInput startFreqIn;
+ private final boolean omitTF;
+
+ boolean storePayloads; // does current field store payloads?
+
+ int ord; // how many docs we've read
+ int docFreq;
+
+ long freqOffset;
+ long proxOffset;
+ int skipOffset;
+
+ boolean skipped;
+ DefaultSkipListReader skipper;
+ private int payloadLength;
+
+ private final DocDeltasReader docDeltasReader;
+ private final FreqsReader freqsReader;
+ private final PositionsReader positionDeltasReader;
+
+ private boolean docsPending, freqsPending;
+
+ public SegmentBulkPostingsEnum(boolean omitTF, boolean doFreqs, boolean doPositions) throws IOException {
+ //System.out.println("bulk init");
+ startFreqIn = StandardPostingsReader.this.freqIn;
+ this.freqIn = (IndexInput) StandardPostingsReader.this.freqIn.clone();
+ this.omitTF = omitTF;
+
+ docDeltasReader = new DocDeltasReader();
+ if (doFreqs && !omitTF) {
+ freqsReader = new FreqsReader();
+ } else {
+ freqsReader = null;
+ }
+
+ if (doPositions && !omitTF) {
+ this.proxIn = (IndexInput) StandardPostingsReader.this.proxIn.clone();
+ positionDeltasReader = new PositionsReader();
+ } else {
+ this.proxIn = null;
+ positionDeltasReader = null;
+ }
+ }
+
+ public boolean canReuse(FieldInfo fieldInfo, IndexInput freqin, boolean doFreqs, boolean doPositions) {
+ return freqIn == startFreqIn &&
+ (!doFreqs || freqsReader == null) &&
+ (!doPositions || positionDeltasReader == null) &&
+ (omitTF == fieldInfo.omitTermFreqAndPositions);
+ }
+
+ final void read() throws IOException {
+ if (freqsReader == null) {
+ // Consumer only wants doc deltas
+ assert !docsPending;
+ if (omitTF) {
+ // Index only stores doc deltas
+ for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+ docDeltasReader.buffer[i] = freqIn.readVInt();
+ }
+ } else {
+ // Index stores doc deltas & freq
+ for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+ final int code = freqIn.readVInt();
+ docDeltasReader.buffer[i] = code >>> 1;
+ if ((code & 1) == 0) {
+ freqIn.readVInt();
+ }
+ }
+ }
+ ord += BULK_BUFFER_SIZE;
+ docsPending = true;
+ } else {
+ // Consumer wants both
+ assert !docsPending;
+ assert !freqsPending;
+ for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+ final int code = freqIn.readVInt();
+ docDeltasReader.buffer[i] = code >>> 1;
+ if ((code & 1) == 0) {
+ freqsReader.buffer[i] = freqIn.readVInt();
+ } else {
+ freqsReader.buffer[i] = 1;
+ }
+ }
+ ord += BULK_BUFFER_SIZE;
+ docsPending = true;
+ freqsPending = true;
+ }
+ }
+
+ private class DocDeltasReader extends BulkPostingsEnum.BlockReader {
+ final int[] buffer = new int[BULK_BUFFER_SIZE];
+ int limit;
+ int offset;
+
+ @Override
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ if (!docsPending) {
+ read();
+ }
+ docsPending = false;
+ limit = BULK_BUFFER_SIZE;
+ offset = 0;
+ //System.out.println("spr: doc deltas read limit=" + limit);
+ return BULK_BUFFER_SIZE;
+ }
+
+ @Override
+ public int offset() {
+ return offset;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ this.offset = offset;
+ }
+ }
+
+ private class FreqsReader extends BulkPostingsEnum.BlockReader {
+ final int[] buffer = new int[BULK_BUFFER_SIZE];
+ int limit;
+
+ @Override
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ if (!freqsPending) {
+ read();
+ }
+ freqsPending = false;
+ limit = BULK_BUFFER_SIZE;
+ return BULK_BUFFER_SIZE;
+ }
+
+ @Override
+ public int offset() {
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private class PositionsReader extends BulkPostingsEnum.BlockReader {
+ final int[] buffer = new int[BULK_BUFFER_SIZE];
+ int limit;
+
+ @Override
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int end() {
+ return limit;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ // nocommit -- must flush prx file w/ extra 127 0
+ // positions -- index change!!
+ if (storePayloads) {
+ for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+ final int code = proxIn.readVInt();
+ buffer[i] = code >>> 1;
+ if ((code & 1) != 0) {
+ payloadLength = proxIn.readVInt();
+ }
+ if (payloadLength != 0) {
+ // skip payload
+ proxIn.seek(proxIn.getFilePointer()+payloadLength);
+ }
+ }
+ } else {
+ for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+ buffer[i] = proxIn.readVInt();
+ }
+ }
+ limit = BULK_BUFFER_SIZE;
+ return BULK_BUFFER_SIZE;
+ }
+
+ @Override
+ public int offset() {
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ @Override
+ public BlockReader getDocDeltasReader() {
+ return docDeltasReader;
+ }
+
+ @Override
+ public BlockReader getFreqsReader() {
+ return freqsReader;
+ }
+
+ @Override
+ public BlockReader getPositionDeltasReader() {
+ return positionDeltasReader;
+ }
+
+ public SegmentBulkPostingsEnum reset(FieldInfo fieldInfo, DocTermState termState) throws IOException {
+ storePayloads = fieldInfo.storePayloads;
+ freqOffset = termState.freqOffset;
+ freqIn.seek(freqOffset);
+
+ // TODO: for full enum case (eg segment merging) this
+ // seek is unnecessary; maybe we can avoid in such
+ // cases
+ if (positionDeltasReader != null) {
+ // nocommit -- how come this is a live seek but
+ // frq/doc is not?
+ proxOffset = termState.proxOffset;
+ proxIn.seek(proxOffset);
+ }
+
+ skipOffset = termState.skipOffset;
+ docFreq = termState.docFreq;
+ assert docFreq > 0;
+
+ ord = 0;
+ skipped = false;
+
+ return this;
+ }
+
+ private final JumpResult jumpResult = new JumpResult();
+
+ @Override
+ public JumpResult jump(int target, int curCount) throws IOException {
+
+ // TODO: jump right to next() if target is < X away
+ // from where we are now?
+
+ if (skipOffset > 0) {
+
+ // There are enough docs in the posting to have
+ // skip data
+
+ if (skipper == null) {
+ // This is the first time this enum has ever been used for skipping -- do lazy init
+ skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
+ }
+
+ if (!skipped) {
+
+ // This is the first time this posting has
+ // skipped since reset() was called, so now we
+ // load the skip data for this posting
+ skipper.init(freqOffset + skipOffset,
+ freqOffset, proxOffset,
+ docFreq, storePayloads);
+
+ skipped = true;
+ }
+
+ final int newOrd = skipper.skipTo(target);
+
+ // nocommit rename ord -> count
+ assert curCount == ord: "curCount=" + curCount + " ord=" + ord;
+
+ if (newOrd > ord) {
+ // Skipper moved
+ //System.out.println("newOrd=" + newOrd + " vs ord=" + ord + " doc=" + skipper.getDoc());
+
+ freqIn.seek(skipper.getFreqPointer());
+ docDeltasReader.limit = 0;
+
+ if (freqsReader != null) {
+ freqsReader.limit = 0;
+ }
+
+ if (positionDeltasReader != null) {
+ positionDeltasReader.limit = 0;
+ proxIn.seek(skipper.getProxPointer());
+ }
+
+ jumpResult.count = ord = newOrd;
+ jumpResult.docID = skipper.getDoc();
+
+ return jumpResult;
+ }
+ }
+
+ // no jump occurred
+ return null;
+ }
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Tue Dec 14 17:18:00 2010
@@ -224,6 +224,16 @@ public final class StandardPostingsWrite
@Override
public void close() throws IOException {
+
+ // Readers read whole blocks at once, so we have to
+ // flush final block out w/ unused values:
+ for(int i=0;i<StandardPostingsReader.BULK_BUFFER_SIZE-1;i++) {
+ freqOut.writeVInt(1);
+ if (proxOut != null) {
+ proxOut.writeVInt(0);
+ }
+ }
+
try {
freqOut.close();
} finally {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Tue Dec 14 17:18:00 2010
@@ -239,6 +239,9 @@ final class BooleanScorer extends Scorer
do {
bucketTable.first = null;
+ // used only by assert:
+ int count = 0;
+
while (current != null) { // more queued
// check prohibited & required
@@ -264,6 +267,8 @@ final class BooleanScorer extends Scorer
}
}
+ assert count++ < BucketTable.SIZE;
+ assert current != current.next;
current = current.next; // pop the queue
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Tue Dec 14 17:18:00 2010
@@ -142,6 +142,11 @@ public class ConstantScoreQuery extends
public int nextDoc() throws IOException {
return docIdSetIterator.nextDoc();
}
+
+ @Override
+ public String toString() {
+ return "ConstantScorer(" + filter + ")";
+ }
@Override
public int docID() {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Dec 14 17:18:00 2010
@@ -23,6 +23,7 @@ import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
@@ -150,6 +151,12 @@ public abstract class FilteredTermsEnum
public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException {
return tenum.docs(bits, reuse);
}
+
+ @Override
+ public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ assert tenum != null;
+ return tenum.bulkPostings(reuse, doFreqs, doPositions);
+ }
@Override
public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Tue Dec 14 17:18:00 2010
@@ -23,7 +23,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.Bits;
@@ -125,26 +125,32 @@ public class MultiTermQueryWrapperFilter
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
int termCount = 0;
final Bits delDocs = MultiFields.getDeletedDocs(reader);
- DocsEnum docsEnum = null;
+ BulkPostingsEnum postingsEnum = null;
do {
termCount++;
- // System.out.println(" iter termCount=" + termCount + " term=" +
- // enumerator.term().toBytesString());
- docsEnum = termsEnum.docs(delDocs, docsEnum);
- final DocsEnum.BulkReadResult result = docsEnum.getBulkResult();
- while (true) {
- final int count = docsEnum.read();
- if (count != 0) {
- final int[] docs = result.docs.ints;
- for (int i = 0; i < count; i++) {
- bitSet.set(docs[i]);
- }
- } else {
- break;
+ postingsEnum = termsEnum.bulkPostings(postingsEnum, false, false);
+ final int docFreq = termsEnum.docFreq();
+ final BulkPostingsEnum.BlockReader docDeltasReader = postingsEnum.getDocDeltasReader();
+ final int[] docDeltas = docDeltasReader.getBuffer();
+ int offset = docDeltasReader.offset();
+ int limit = docDeltasReader.end();
+ if (offset >= limit) {
+ limit = docDeltasReader.fill();
+ }
+ int count = 0;
+ int doc = 0;
+ while (count < docFreq) {
+ if (offset >= limit) {
+ offset = 0;
+ limit = docDeltasReader.fill();
+ }
+ doc += docDeltas[offset++];
+ count++;
+ if (delDocs == null || !delDocs.get(doc)) {
+ bitSet.set(doc);
}
}
} while (termsEnum.next() != null);
- // System.out.println(" done termCount=" + termCount);
query.incTotalNumberOfTerms(termCount);
return bitSet;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Dec 14 17:18:00 2010
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation.IDFExplanation;
@@ -76,15 +77,18 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
- DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(),
- term.field(),
- term.bytes());
-
+ assert reader.getSequentialSubReaders() == null;
+ BulkPostingsEnum docs = reader.bulkTermPostingsEnum(term.field(),
+ term.bytes(),
+ true,
+ false);
if (docs == null) {
return null;
}
- return new TermScorer(this, docs, similarity, reader.norms(term.field()));
+ // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
+ return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()),
+ reader.getDeletedDocs(), similarity, reader.norms(term.field()));
}
@Override
@@ -124,10 +128,10 @@ public class TermQuery extends Query {
int tf = 0;
DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), term.bytes());
if (docs != null) {
- int newDoc = docs.advance(doc);
- if (newDoc == doc) {
- tf = docs.freq();
- }
+ int newDoc = docs.advance(doc);
+ if (newDoc == doc) {
+ tf = docs.freq();
+ }
tfExplanation.setValue(similarity.tf(tf));
tfExplanation.setDescription("tf(termFreq("+term+")="+tf+")");
} else {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Dec 14 17:18:00 2010
@@ -19,26 +19,33 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.index.BulkPostingsEnum;
+import org.apache.lucene.util.Bits;
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
*/
final class TermScorer extends Scorer {
- private DocsEnum docsEnum;
+ private BulkPostingsEnum docsEnum;
private byte[] norms;
private float weightValue;
- private int doc = -1;
- private int freq;
+ private int doc;
- private int pointer;
- private int pointerMax;
+ private final int[] docDeltas;
+ private int docPointer;
+ private int docPointerMax;
+ private boolean first = true;
+
+ private final int[] freqs;
+ private int freqPointer;
+ private int freqPointerMax;
private static final int SCORE_CACHE_SIZE = 32;
private float[] scoreCache = new float[SCORE_CACHE_SIZE];
- private int[] docs;
- private int[] freqs;
- private final DocsEnum.BulkReadResult bulkResult;
+ private final BulkPostingsEnum.BlockReader freqsReader;
+ private final BulkPostingsEnum.BlockReader docDeltasReader;
+ private final Bits skipDocs;
+ private final int docFreq;
+ private int count;
/**
* Construct a <code>TermScorer</code>.
@@ -53,13 +60,36 @@ final class TermScorer extends Scorer {
* @param norms
* The field norms of the document fields for the <code>Term</code>.
*/
- TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) {
+ TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
super(similarity, weight);
this.docsEnum = td;
+ this.docFreq = docFreq;
+ docDeltasReader = td.getDocDeltasReader();
+ docDeltas = docDeltasReader.getBuffer();
+ docPointerMax = docDeltasReader.end();
+ docPointer = docDeltasReader.offset();
+ if (docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ }
+ docPointer--;
+
+ freqsReader = td.getFreqsReader();
+ if (freqsReader != null) {
+ freqs = freqsReader.getBuffer();
+ freqPointerMax = freqsReader.end();
+ freqPointer = freqsReader.offset();
+ if (freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ }
+ freqPointer--;
+ } else {
+ freqs = null;
+ }
+
+ this.skipDocs = skipDocs;
this.norms = norms;
this.weightValue = weight.getValue();
- bulkResult = td.getBulkResult();
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = getSimilarity().tf(i) * weightValue;
@@ -70,41 +100,73 @@ final class TermScorer extends Scorer {
score(c, Integer.MAX_VALUE, nextDoc());
}
- private final void refillBuffer() throws IOException {
- pointerMax = docsEnum.read(); // refill
- docs = bulkResult.docs.ints;
- freqs = bulkResult.freqs.ints;
- }
-
// firstDocID is ignored since nextDoc() sets 'doc'
@Override
protected boolean score(Collector c, int end, int firstDocID) throws IOException {
c.setScorer(this);
+ //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc);
+ // nocommit -- this can leave scorer on a deleted doc...
while (doc < end) { // for docs in window
- c.collect(doc); // collect score
- if (++pointer >= pointerMax) {
- refillBuffer();
- if (pointerMax != 0) {
- pointer = 0;
- } else {
- doc = NO_MORE_DOCS; // set to sentinel value
- return false;
+ if (skipDocs == null || !skipDocs.get(doc)) {
+ //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq);
+ c.collect(doc); // collect
+ }
+ if (count == docFreq) {
+ doc = NO_MORE_DOCS;
+ return false;
+ }
+ count++;
+ docPointer++;
+
+ //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq);
+
+ if (docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader);
+ assert docPointerMax != 0;
+ docPointer = 0;
+
+ if (freqsReader != null) {
+ freqPointer++;
+ // NOTE: this code is intentionally dup'd
+ // (specialized) w/ the else clause, for better CPU
+ // branch prediction (assuming compiler doesn't
+ // de-dup): for codecs that always bulk read same
+ // number of docDeltas & freqs (standard, for,
+ // pfor), this if will always be true. Other codecs
+ // (simple9/16) will not be aligned:
+ if (freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
}
- }
- doc = docs[pointer];
- freq = freqs[pointer];
+ } else if (freqsReader != null) {
+ freqPointer++;
+ if (freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ }
+
+ doc += docDeltas[docPointer];
}
return true;
}
@Override
public int docID() {
- return doc;
+ return first ? -1 : doc;
}
@Override
public float freq() {
- return freq;
+ if (freqsReader != null) {
+ return freqs[freqPointer];
+ } else {
+ return 1.0f;
+ }
}
/**
@@ -116,23 +178,65 @@ final class TermScorer extends Scorer {
*/
@Override
public int nextDoc() throws IOException {
- pointer++;
- if (pointer >= pointerMax) {
- refillBuffer();
- if (pointerMax != 0) {
- pointer = 0;
+ //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq);
+ while(count < docFreq) {
+ docPointer++;
+ if (docPointer >= docPointerMax) {
+ //System.out.println("ts.nd refill docs");
+ docPointerMax = docDeltasReader.fill();
+ assert docPointerMax != 0;
+ docPointer = 0;
+ if (freqsReader != null) {
+ // NOTE: this code is intentionally dup'd
+ // (specialized) w/ the else clause, for better CPU
+ // branch prediction (assuming compiler doesn't
+ // de-dup): for codecs that always bulk read same
+ // number of docDeltas & freqs (standard, for,
+ // pfor), this if will always be true. Other codecs
+ // (simple9/16) will not be aligned:
+ freqPointer++;
+ if (freqPointer >= freqPointerMax) {
+ //System.out.println("ts.nd refill freqs");
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ }
} else {
- return doc = NO_MORE_DOCS;
+ if (freqsReader != null) {
+ freqPointer++;
+ if (freqPointer >= freqPointerMax) {
+ //System.out.println("ts.nd refill freqs");
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ }
}
- }
- doc = docs[pointer];
- freq = freqs[pointer];
- assert doc != NO_MORE_DOCS;
- return doc;
+ count++;
+ doc += docDeltas[docPointer];
+ first = false;
+ assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
+ if (skipDocs == null || !skipDocs.get(doc)) {
+ //System.out.println(" ret doc=" + doc + " freq=" + freq());
+ return doc;
+ }
+ }
+
+ //System.out.println(" end");
+ return doc = NO_MORE_DOCS;
}
@Override
public float score() {
+ assert !first;
+ final int freq;
+ if (freqsReader == null) {
+ freq = 1;
+ } else {
+ freq = freqs[freqPointer];
+ }
+ assert freq > 0;
assert doc != NO_MORE_DOCS;
float raw = // compute tf(f)*weight
freq < SCORE_CACHE_SIZE // check cache
@@ -153,24 +257,100 @@ final class TermScorer extends Scorer {
*/
@Override
public int advance(int target) throws IOException {
- // first scan in cache
- for (pointer++; pointer < pointerMax; pointer++) {
- if (docs[pointer] >= target) {
- freq = freqs[pointer];
- return doc = docs[pointer];
+
+ // nocommit: should we, here, optimize .advance(target that isn't
+ // too far away) into scan? seems like simple win?
+
+ // first scan current doc deltas block
+ for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ first = false;
+ count++;
+ if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
}
}
- // not found in readahead cache, seek underlying stream
- int newDoc = docsEnum.advance(target);
- //System.out.println("ts.advance docsEnum=" + docsEnum);
- if (newDoc != NO_MORE_DOCS) {
- doc = newDoc;
- freq = docsEnum.freq();
+ if (count == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+
+ // not found in current block, seek underlying stream
+ BulkPostingsEnum.JumpResult jumpResult = docsEnum.jump(target, count);
+ if (jumpResult != null) {
+ count = jumpResult.count;
+ doc = jumpResult.docID;
+ first = false;
+ docPointer = docDeltasReader.offset();
+ docPointerMax = docDeltasReader.end();
+ if (docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ }
+ docPointer--;
+ if (freqsReader != null) {
+ freqPointer = freqsReader.offset();
+ freqPointerMax = freqsReader.end();
+ if (freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ }
+ freqPointer--;
+ }
} else {
- doc = NO_MORE_DOCS;
+ // seek did not jump -- just fill next buffer
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ assert first || docDeltas[0] > 0;
+ doc += docDeltas[0];
+ count++;
+ first = false;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ }
+
+ // now scan
+ while(true) {
+ assert doc >= 0 && doc != NO_MORE_DOCS;
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
+ }
+
+ if (count >= docFreq) {
+ break;
+ }
+
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+
+ if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ count++;
}
- return doc;
+ return doc = NO_MORE_DOCS;
}
/** Returns a string representation of this <code>TermScorer</code>. */
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java Tue Dec 14 17:18:00 2010
@@ -814,4 +814,25 @@ public final class BitUtil {
return v;
}
+ /** Returns the smallest non negative p such that a given value < (2**(p+1))
+ * This differs from (63 - java.lang.Long.numberOfLeadingZeros(v))
+ * for non positive given values.
+ */
+ public static int logNextHigherPowerOfTwo(long v) {
+ long vinput = v; // only for assertions below.
+ int p = 0;
+ while (v >= (1 << 8)) {
+ v >>= 8;
+ p += 8;
+ }
+ while (v >= (1 << 1)) {
+ v >>= 1;
+ p++;
+ }
+ assert (p <= 62) : p;
+ assert (p == 62) || (vinput < (1L << (p + 1))) : "p " + p + ", vinput " + vinput;
+ assert (p == 0) || (vinput >= (1L << p)) : "p " + p + ", vinput " + vinput;
+ assert (vinput <= 0) || (p == (63 - Long.numberOfLeadingZeros(vinput))) : "p " + p + ", vinput " + vinput;
+ return p;
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Dec 14 17:18:00 2010
@@ -342,6 +342,114 @@ public class TestExternalCodecs extends
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) {
return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), skipDocs);
}
+
+ @Override
+ public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ return new RAMBulkPostingsEnum(ramField.termToDocs.get(current));
+ }
+ }
+
+ static final int BULK_BUFFER_SIZE = 64;
+
+ // Bulk postings API
+ private static class RAMBulkPostingsEnum extends BulkPostingsEnum {
+ private final RAMTerm ramTerm;
+ private final BlockReader docDeltasReader;
+ private final BlockReader freqsReader;
+ private final BlockReader posDeltasReader;
+
+ public RAMBulkPostingsEnum(RAMTerm ramTerm) throws IOException {
+ this.ramTerm = ramTerm;
+
+ int[] docDeltas = new int[10];
+ int[] freqs = new int[10];
+ int[] posDeltas = new int[10];
+ int docUpto = 0;
+ int posUpto = 0;
+ int lastDocID = 0;
+ for(RAMDoc doc : ramTerm.docs) {
+ if (docDeltas.length == docUpto) {
+ docDeltas = ArrayUtil.grow(docDeltas, 1+docUpto);
+ freqs = ArrayUtil.grow(freqs, 1+docUpto);
+ }
+ docDeltas[docUpto] = doc.docID - lastDocID;
+ freqs[docUpto] = doc.positions.length;
+ docUpto++;
+ lastDocID = doc.docID;
+ int lastPos = 0;
+ for(int pos : doc.positions) {
+ if (posDeltas.length == posUpto) {
+ posDeltas = ArrayUtil.grow(posDeltas, 1+posUpto);
+ }
+ posDeltas[posUpto++] = pos - lastPos;
+ lastPos = pos;
+ }
+ }
+ docDeltasReader = new SimpleBlockReader(docDeltas, docUpto);
+ freqsReader = new SimpleBlockReader(freqs, docUpto);
+ posDeltasReader = new SimpleBlockReader(posDeltas, posUpto);
+ }
+
+ @Override
+ public BlockReader getDocDeltasReader() {
+ return docDeltasReader;
+ }
+
+ @Override
+ public BlockReader getFreqsReader() {
+ return freqsReader;
+ }
+
+ @Override
+ public BlockReader getPositionDeltasReader() {
+ return posDeltasReader;
+ }
+
+ @Override
+ public JumpResult jump(int target, int curCount) {
+ return null;
+ }
+
+ private static class SimpleBlockReader extends BlockReader {
+ private final int[] ints;
+ private final int count;
+ private boolean done;
+
+ public SimpleBlockReader(int[] ints, int count) {
+ this.ints = ints;
+ this.count = count;
+ }
+
+ @Override
+ public int[] getBuffer() {
+ return ints;
+ }
+
+ @Override
+ public int fill() {
+ if (!done) {
+ done = true;
+ return count;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int end() {
+ return done ? 0 : count;
+ }
+
+ @Override
+ public int offset() {
+ return 0;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ throw new UnsupportedOperationException();
+ }
+ }
}
private static class RAMDocsEnum extends DocsEnum {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Dec 14 17:18:00 2010
@@ -2910,4 +2910,138 @@ public class TestIndexWriter extends Luc
dir.close();
}
+
+ public void testGrowingGaps() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ //w.w.setInfoStream(System.out);
+ Document doc = new Document();
+ Field f = newField(random, "field", "two", Field.Store.NO, Field.Index.ANALYZED);
+ doc.add(f);
+ final int NUM_GAPS = 100;
+ for(int i=0;i<NUM_GAPS;i++) {
+ f.setValue("one");
+ w.addDocument(doc);
+ f.setValue("two");
+ for(int j=0;j<1+i;j++) {
+ w.addDocument(doc);
+ }
+ }
+
+ // MultiBulkPostingsEnum doesn't jump (yet):
+ w.optimize();
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ DocsEnum docs = MultiFields.getTermDocsEnum(r,
+ MultiFields.getDeletedDocs(r),
+ "field",
+ new BytesRef("one"));
+ // test simple linear scan:
+ int[] docIDs = new int[r.maxDoc()];
+ int upto = 0;
+ int docID;
+ int expDocID = 0;
+ int gap = 2;
+ while((docID = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ //System.out.println(" got doc=" + docID + " ord=" + upto);
+ docIDs[upto++] = docID;
+ assertEquals(expDocID, docID);
+ expDocID += gap;
+ gap++;
+ }
+ assertEquals(NUM_GAPS, upto);
+
+ final int maxDoc = r.maxDoc();
+
+ // test advance:
+ for(int i=0;i<NUM_GAPS;i++) {
+ docs = MultiFields.getTermDocsEnum(r,
+ MultiFields.getDeletedDocs(r),
+ "field",
+ new BytesRef("one"));
+ //System.out.println(" .advance(" + docIDs[i] + ")");
+ assertEquals(docIDs[i], docs.advance(docIDs[i]));
+ for(int j=i+1;j<NUM_GAPS;j++) {
+ assertEquals(docIDs[j], docs.nextDoc());
+ }
+ assertEquals(DocsEnum.NO_MORE_DOCS, docs.nextDoc());
+ }
+
+ assertEquals(NUM_GAPS, r.docFreq("field", new BytesRef("one")));
+
+ BulkPostingsEnum bulkPostings = MultiFields.getBulkPostingsEnum(r,
+ "field",
+ new BytesRef("one"),
+ false,
+ false);
+
+ // test simple linear scan using BulkPostingsEnum:
+ BulkPostingsEnum.BlockReader docDeltasReader = bulkPostings.getDocDeltasReader();
+ int[] docDeltas = docDeltasReader.getBuffer();
+ int docDeltaUpto = docDeltasReader.offset();
+ int docDeltaMax = docDeltasReader.end();
+ if (docDeltaUpto >= docDeltaMax) {
+ docDeltaMax = docDeltasReader.fill();
+ }
+ docID = 0;
+ for(int i=0;i<NUM_GAPS;i++) {
+ if (docDeltaUpto == docDeltaMax) {
+ docDeltaUpto = 0;
+ docDeltaMax = docDeltasReader.fill();
+ }
+ assertTrue(docDeltas[docDeltaUpto] > 0 || i==0);
+ docID += docDeltas[docDeltaUpto++];
+ assertEquals(docID, docIDs[i]);
+ }
+
+ // nocommit test reuse too
+ // test jump using BulkPostingsEnum:
+ boolean didJump = false;
+ for(int i=0;i<NUM_GAPS;i++) {
+ //System.out.println("GAP i=" + i);
+ bulkPostings = MultiFields.getBulkPostingsEnum(r,
+ "field",
+ new BytesRef("one"),
+ false,
+ false);
+ //System.out.println("try jump " + docIDs[i]);
+ final BulkPostingsEnum.JumpResult jr = bulkPostings.jump(docIDs[i], 0);
+ int count;
+ if (jr != null) {
+ //System.out.println(" got jump!");
+ didJump = true;
+ assertEquals("jump to docID=" + docID + " got count=" + jr.count + " docID=" + jr.docID, docIDs[jr.count-1], jr.docID);
+ docID = jr.docID;
+ count = jr.count;
+ } else {
+ //System.out.println(" no jump!");
+ docID = 0;
+ count = 0;
+ }
+ docDeltasReader = bulkPostings.getDocDeltasReader();
+ docDeltas = docDeltasReader.getBuffer();
+ docDeltaUpto = docDeltasReader.offset();
+ docDeltaMax = docDeltasReader.end();
+ if (docDeltaUpto >= docDeltaMax) {
+ docDeltaMax = docDeltasReader.fill();
+ //System.out.println(" do pre-fill");
+ }
+ for(int j=count;j<NUM_GAPS;j++) {
+ //System.out.println(" GAP j=" + j);
+ if (docDeltaUpto >= docDeltaMax) {
+ docDeltaUpto = 0;
+ docDeltaMax = docDeltasReader.fill();
+ }
+ //System.out.println(" docUpto=" + docDeltaUpto + " delta=" + docDeltas[docDeltaUpto]);
+ docID += docDeltas[docDeltaUpto++];
+ assertEquals(docIDs[j], docID);
+ }
+ }
+ assertTrue(CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText") || didJump);
+
+ r.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java Tue Dec 14 17:18:00 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.index.codecs.i
*/
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.index.codecs.sep.*;
import org.apache.lucene.index.codecs.mockintblock.*;
@@ -36,10 +37,20 @@ public class TestIntBlockCodec extends L
out.close();
IntIndexInput in = f.openInput(dir, "test");
- IntIndexInput.Reader r = in.reader();
+ BulkPostingsEnum.BlockReader r = in.reader();
+
+ final int[] buffer = r.getBuffer();
+ int pointer = 0;
+ int pointerMax = r.fill();
+ assertTrue(pointerMax > 0);
for(int i=0;i<11777;i++) {
- assertEquals(i, r.next());
+ assertEquals(i, buffer[pointer++]);
+ if (pointer == pointerMax) {
+ pointerMax = r.fill();
+ assertTrue(pointerMax > 0);
+ pointer = 0;
+ }
}
in.close();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java Tue Dec 14 17:18:00 2010
@@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.BulkPostingsEnum;
/** Reads IndexInputs written with {@link
* SingleIntIndexOutput}. NOTE: this class is just for
@@ -52,18 +53,41 @@ public class MockSingleIntIndexInput ext
in.close();
}
- public static class Reader extends IntIndexInput.Reader {
+ public static class Reader extends BulkPostingsEnum.BlockReader {
// clone:
private final IndexInput in;
+ private int offset;
+ private final int[] buffer = new int[1];
public Reader(IndexInput in) {
this.in = in;
}
- /** Reads next single int */
@Override
- public int next() throws IOException {
- return in.readVInt();
+ public int[] getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int offset() {
+ return offset;
+ }
+
+ @Override
+ public void setOffset(int offset) {
+ this.offset = offset;
+ }
+
+ @Override
+ public int end() {
+ return 1;
+ }
+
+ @Override
+ public int fill() throws IOException {
+ buffer[0] = in.readVInt();
+ offset = 0;
+ return 1;
}
}
@@ -81,7 +105,7 @@ public class MockSingleIntIndexInput ext
}
@Override
- public void read(IntIndexInput.Reader indexIn, boolean absolute)
+ public void read(BulkPostingsEnum.BlockReader indexIn, boolean absolute)
throws IOException {
if (absolute) {
fp = indexIn.readVLong();
@@ -96,8 +120,9 @@ public class MockSingleIntIndexInput ext
}
@Override
- public void seek(IntIndexInput.Reader other) throws IOException {
+ public void seek(BulkPostingsEnum.BlockReader other) throws IOException {
((Reader) other).in.seek(fp);
+ other.fill();
}
@Override