You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC
svn commit: r1068809 [9/36] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/queryparser/ dev-tools/...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Wed Feb 9 09:35:27 2011
@@ -20,15 +20,18 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -45,9 +48,12 @@ public class StandardPostingsReader exte
int skipInterval;
int maxSkipLevels;
+ //private String segment;
+
public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, String codecId) throws IOException {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
readBufferSize);
+ //this.segment = segmentInfo.name;
if (segmentInfo.getHasProx()) {
boolean success = false;
try {
@@ -83,33 +89,46 @@ public class StandardPostingsReader exte
}
// Must keep final because we do non-standard clone
- private final static class DocTermState extends TermState {
+ private final static class StandardTermState extends BlockTermState {
long freqOffset;
long proxOffset;
int skipOffset;
+ // Only used by the "primary" TermState -- clones don't
+ // copy this (basically they are "transient"):
+ ByteArrayDataInput bytesReader;
+ byte[] bytes;
+
+ @Override
public Object clone() {
- DocTermState other = new DocTermState();
- other.copy(this);
+ StandardTermState other = new StandardTermState();
+ other.copyFrom(this);
return other;
}
- public void copy(TermState _other) {
- super.copy(_other);
- DocTermState other = (DocTermState) _other;
+ @Override
+ public void copyFrom(TermState _other) {
+ super.copyFrom(_other);
+ StandardTermState other = (StandardTermState) _other;
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
+
+ // Do not copy bytes, bytesReader (else TermState is
+ // very heavy, ie drags around the entire block's
+ // byte[]). On seek back, if next() is in fact used
+ // (rare!), they will be re-read from disk.
}
+ @Override
public String toString() {
return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset;
}
}
@Override
- public TermState newTermState() {
- return new DocTermState();
+ public BlockTermState newTermState() {
+ return new StandardTermState();
}
@Override
@@ -125,35 +144,61 @@ public class StandardPostingsReader exte
}
}
+ /* Reads but does not decode the byte[] blob holding
+ metadata for the current terms block */
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm)
- throws IOException {
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final StandardTermState termState = (StandardTermState) _termState;
+
+ final int len = termsIn.readVInt();
+ //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
+ if (termState.bytes == null) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ termState.bytesReader = new ByteArrayDataInput(null);
+ } else if (termState.bytes.length < len) {
+ termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+
+ termsIn.readBytes(termState.bytes, 0, len);
+ termState.bytesReader.reset(termState.bytes, 0, len);
+ }
- final DocTermState docTermState = (DocTermState) termState;
+ @Override
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
+ throws IOException {
+ final StandardTermState termState = (StandardTermState) _termState;
+ //System.out.println("StandardR.nextTerm seg=" + segment);
+ final boolean isFirstTerm = termState.termCount == 0;
- if (isIndexTerm) {
- docTermState.freqOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.freqOffset = termState.bytesReader.readVLong();
} else {
- docTermState.freqOffset += termsIn.readVLong();
+ termState.freqOffset += termState.bytesReader.readVLong();
}
-
- if (docTermState.docFreq >= skipInterval) {
- docTermState.skipOffset = termsIn.readVInt();
+ //System.out.println(" dF=" + termState.docFreq);
+ //System.out.println(" freqFP=" + termState.freqOffset);
+ assert termState.freqOffset < freqIn.length();
+
+ if (termState.docFreq >= skipInterval) {
+ termState.skipOffset = termState.bytesReader.readVInt();
+ //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
+ assert termState.freqOffset + termState.skipOffset < freqIn.length();
} else {
- docTermState.skipOffset = 0;
+ // undefined
}
if (!fieldInfo.omitTermFreqAndPositions) {
- if (isIndexTerm) {
- docTermState.proxOffset = termsIn.readVLong();
+ if (isFirstTerm) {
+ termState.proxOffset = termState.bytesReader.readVLong();
} else {
- docTermState.proxOffset += termsIn.readVLong();
+ termState.proxOffset += termState.bytesReader.readVLong();
}
+ //System.out.println(" proxFP=" + termState.proxOffset);
}
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
@@ -166,11 +211,11 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsEnum(freqIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
}
@@ -189,7 +234,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
} else {
SegmentDocsAndPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
@@ -203,7 +248,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
}
}
@@ -233,7 +278,7 @@ public class StandardPostingsReader exte
this.freqIn = (IndexInput) freqIn.clone();
}
- public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
omitTF = fieldInfo.omitTermFreqAndPositions;
if (omitTF) {
freq = 1;
@@ -248,8 +293,10 @@ public class StandardPostingsReader exte
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
+ assert limit > 0;
ord = 0;
doc = 0;
+ //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
@@ -331,13 +378,10 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (skipOffset > 0) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close.
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -407,7 +451,7 @@ public class StandardPostingsReader exte
this.proxIn = (IndexInput) proxIn.clone();
}
- public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert !fieldInfo.storePayloads;
@@ -420,6 +464,8 @@ public class StandardPostingsReader exte
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
+ assert limit > 0;
+
ord = 0;
doc = 0;
position = 0;
@@ -430,6 +476,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@@ -438,6 +485,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -461,6 +509,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -477,13 +526,12 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
+ //System.out.println("StandardR.D&PE advance target=" + target);
- if (skipOffset > 0) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -524,6 +572,7 @@ public class StandardPostingsReader exte
return doc;
}
+ @Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
@@ -552,10 +601,12 @@ public class StandardPostingsReader exte
/** Returns the payload at this position, or null if no
* payload was indexed. */
+ @Override
public BytesRef getPayload() throws IOException {
throw new IOException("No payloads exist for this field!");
}
+ @Override
public boolean hasPayload() {
return false;
}
@@ -594,7 +645,7 @@ public class StandardPostingsReader exte
this.proxIn = (IndexInput) proxIn.clone();
}
- public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert fieldInfo.storePayloads;
if (payload == null) {
@@ -622,6 +673,7 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
+ //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this);
return this;
}
@@ -630,6 +682,7 @@ public class StandardPostingsReader exte
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
+ //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
@@ -653,6 +706,7 @@ public class StandardPostingsReader exte
position = 0;
+ //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
}
@@ -669,13 +723,12 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
+ //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
- if (skipOffset > 0) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -687,7 +740,7 @@ public class StandardPostingsReader exte
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
-
+ //System.out.println(" init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length());
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, true);
@@ -718,6 +771,7 @@ public class StandardPostingsReader exte
return doc;
}
+ @Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
@@ -748,6 +802,7 @@ public class StandardPostingsReader exte
posPendingCount--;
position = 0;
payloadPending = false;
+ //System.out.println("StandardR.D&PE skipPos");
}
// read next position
@@ -771,11 +826,13 @@ public class StandardPostingsReader exte
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
+ //System.out.println("StandardR.D&PE nextPos return pos=" + position);
return position;
}
/** Returns the payload at this position, or null if no
* payload was indexed. */
+ @Override
public BytesRef getPayload() throws IOException {
assert lazyProxPointer == -1;
assert posPendingCount < freq;
@@ -785,6 +842,7 @@ public class StandardPostingsReader exte
if (payloadLength > payload.bytes.length) {
payload.grow(payloadLength);
}
+
proxIn.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
payloadPending = false;
@@ -792,6 +850,7 @@ public class StandardPostingsReader exte
return payload;
}
+ @Override
public boolean hasPayload() {
return payloadPending && payloadLength > 0;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Wed Feb 9 09:35:27 2011
@@ -22,12 +22,14 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.TermStats;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -58,8 +60,15 @@ public final class StandardPostingsWrite
int lastPayloadLength;
int lastPosition;
+ private int pendingCount;
+
+ //private String segment;
+
+ private RAMOutputStream bytesWriter = new RAMOutputStream();
+
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
super();
+ //this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName);
@@ -95,6 +104,7 @@ public final class StandardPostingsWrite
@Override
public void startTerm() {
+ //System.out.println("StandardW: startTerm seg=" + segment + " pendingCount=" + pendingCount);
freqStart = freqOut.getFilePointer();
if (proxOut != null) {
proxStart = proxOut.getFilePointer();
@@ -108,9 +118,12 @@ public final class StandardPostingsWrite
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
+ //System.out.println("SPW: setField");
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
+ //System.out.println(" set init blockFreqStart=" + freqStart);
+ //System.out.println(" set init blockProxStart=" + proxStart);
}
int lastDocID;
@@ -120,6 +133,7 @@ public final class StandardPostingsWrite
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
+ //System.out.println("StandardW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq);
final int delta = docID - lastDocID;
@@ -150,6 +164,7 @@ public final class StandardPostingsWrite
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
+ //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
assert proxOut != null;
@@ -184,40 +199,51 @@ public final class StandardPostingsWrite
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
- assert docCount > 0;
+ public void finishTerm(TermStats stats) throws IOException {
+ //System.out.println("StandardW.finishTerm seg=" + segment);
+ assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
- assert docCount == df;
+ assert stats.docFreq == df;
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(freqStart);
+ final boolean isFirstTerm = pendingCount == 0;
+ //System.out.println(" isFirstTerm=" + isFirstTerm);
+
+ //System.out.println(" freqFP=" + freqStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(freqStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(freqStart - lastFreqStart);
+ bytesWriter.writeVLong(freqStart-lastFreqStart);
}
-
lastFreqStart = freqStart;
if (df >= skipInterval) {
- termsOut.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
+ bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
-
+
if (!omitTermFreqAndPositions) {
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(proxStart);
+ //System.out.println(" proxFP=" + proxStart);
+ if (isFirstTerm) {
+ bytesWriter.writeVLong(proxStart);
} else {
- // Write delta between seek points
- termsOut.writeVLong(proxStart - lastProxStart);
+ bytesWriter.writeVLong(proxStart - lastProxStart);
}
lastProxStart = proxStart;
}
-
+
lastDocID = 0;
df = 0;
+ pendingCount++;
+ }
+
+ @Override
+ public void flushTermsBlock() throws IOException {
+ //System.out.println("SPW.flushBlock pendingCount=" + pendingCount);
+ termsOut.writeVInt((int) bytesWriter.getFilePointer());
+ bytesWriter.writeTo(termsOut);
+ bytesWriter.reset();
+ pendingCount = 0;
}
@Override