You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/13 03:09:56 UTC
svn commit: r1058390 [4/16] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/
dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr...
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Thu Jan 13 02:09:33 2011
@@ -59,17 +59,10 @@ final class TermVectorsTermsWriter exten
tvx = tvd = tvf = null;
assert state.segmentName != null;
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
- String fldName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
- String docName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
-
if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
}
- state.flushedFiles.add(idxName);
- state.flushedFiles.add(fldName);
- state.flushedFiles.add(docName);
-
lastDocID = 0;
state.hasVectors = hasVectors;
hasVectors = false;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java Thu Jan 13 02:09:33 2011
@@ -36,13 +36,17 @@ final class DeltaBytesReader {
term.copy(text);
}
- void read() throws IOException {
+ boolean read() throws IOException {
final int start = in.readVInt();
+ if (start == DeltaBytesWriter.TERM_EOF) {
+ return false;
+ }
final int suffix = in.readVInt();
assert start <= term.length: "start=" + start + " length=" + term.length;
final int newLength = start+suffix;
term.grow(newLength);
in.readBytes(term.bytes, start, suffix);
term.length = newLength;
+ return true;
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java Thu Jan 13 02:09:33 2011
@@ -20,11 +20,18 @@ package org.apache.lucene.index.codecs;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import java.io.IOException;
final class DeltaBytesWriter {
+ // Must be bigger than
+ // DocumentsWriter.MAX_TERM_LENGTH_UTF8. If you change
+ // this it's an index format change, so that change must be
+ // versioned:
+ final static int TERM_EOF = BYTE_BLOCK_SIZE;
+
private byte[] lastBytes = new byte[10];
private int lastLength;
final IndexOutput out;
@@ -45,8 +52,9 @@ final class DeltaBytesWriter {
final int limit = length < lastLength ? length : lastLength;
while(start < limit) {
- if (bytes[upto] != lastBytes[start])
+ if (bytes[upto] != lastBytes[start]) {
break;
+ }
start++;
upto++;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Thu Jan 13 02:09:33 2011
@@ -33,29 +33,6 @@ import java.util.Collection;
import java.util.Comparator;
import java.io.IOException;
-/**
- * Uses a simplistic format to record terms dict index
- * information. Limititations:
- *
- * - Index for all fields is loaded entirely into RAM up
- * front
- * - Index is stored in RAM using shared byte[] that
- * wastefully expand every term. Using FST to share
- * common prefix & suffix would save RAM.
- * - Index is taken at regular numTerms (every 128 by
- * default); might be better to do it by "net docFreqs"
- * encountered, so that for spans of low-freq terms we
- * take index less often.
- *
- * A better approach might be something similar to how
- * postings are encoded, w/ multi-level skips. Ie, load all
- * terms index data into memory, as a single large compactly
- * encoded stream (eg delta bytes + delta offset). Index
- * that w/ multi-level skipper. Then to look up a term is
- * the equivalent binary search, using the skipper instead,
- * while data remains compressed in memory.
- */
-
import org.apache.lucene.index.IndexFileNames;
/** @lucene.experimental */
@@ -74,7 +51,7 @@ public class FixedGapTermsIndexReader ex
final private int indexInterval;
// Closed if indexLoaded is true:
- final private IndexInput in;
+ private IndexInput in;
private volatile boolean indexLoaded;
private final Comparator<BytesRef> termComp;
@@ -85,7 +62,7 @@ public class FixedGapTermsIndexReader ex
private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
private PagedBytes.Reader termBytesReader;
- final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
+ final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
// start of the field info data
protected long dirOffset;
@@ -95,7 +72,7 @@ public class FixedGapTermsIndexReader ex
this.termComp = termComp;
- IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
+ in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
boolean success = false;
@@ -116,49 +93,137 @@ public class FixedGapTermsIndexReader ex
seekDir(in, dirOffset);
// Read directory
- final int numFields = in.readInt();
-
+ final int numFields = in.readVInt();
for(int i=0;i<numFields;i++) {
- final int field = in.readInt();
- final int numIndexTerms = in.readInt();
- final long termsStart = in.readLong();
- final long indexStart = in.readLong();
- final long packedIndexStart = in.readLong();
- final long packedOffsetsStart = in.readLong();
+ final int field = in.readVInt();
+ final int numIndexTerms = in.readVInt();
+ final long termsStart = in.readVLong();
+ final long indexStart = in.readVLong();
+ final long packedIndexStart = in.readVLong();
+ final long packedOffsetsStart = in.readVLong();
assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
- if (numIndexTerms > 0) {
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
- fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
- }
+ final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
}
success = true;
} finally {
if (indexDivisor > 0) {
in.close();
- this.in = null;
+ in = null;
if (success) {
indexLoaded = true;
}
termBytesReader = termBytes.freeze(true);
- } else {
- this.in = in;
}
}
}
+ @Override
+ public int getDivisor() {
+ return indexDivisor;
+ }
+
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
dirOffset = input.readLong();
}
- private final class FieldIndexReader extends FieldReader {
+ private class IndexEnum extends FieldIndexEnum {
+ private final FieldIndexData.CoreFieldIndex fieldIndex;
+ private final BytesRef term = new BytesRef();
+ private final BytesRef nextTerm = new BytesRef();
+ private long ord;
- final private FieldInfo fieldInfo;
+ public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
+ this.fieldIndex = fieldIndex;
+ }
+
+ @Override
+ public BytesRef term() {
+ return term;
+ }
+
+ @Override
+ public long seek(BytesRef target) {
+ int lo = 0; // binary search
+ int hi = fieldIndex.numIndexTerms - 1;
+ assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
+
+ while (hi >= lo) {
+ int mid = (lo + hi) >>> 1;
+
+ final long offset = fieldIndex.termOffsets.get(mid);
+ final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
+ termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+ int delta = termComp.compare(target, term);
+ if (delta < 0) {
+ hi = mid - 1;
+ } else if (delta > 0) {
+ lo = mid + 1;
+ } else {
+ assert mid >= 0;
+ ord = mid*totalIndexInterval;
+ return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
+ }
+ }
- private volatile CoreFieldIndex coreIndex;
+ if (hi < 0) {
+ assert hi == -1;
+ hi = 0;
+ }
- private final IndexInput in;
+ final long offset = fieldIndex.termOffsets.get(hi);
+ final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
+ termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+ ord = hi*totalIndexInterval;
+ return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
+ }
+
+ @Override
+ public long next() {
+ final int idx = 1 + (int) (ord / totalIndexInterval);
+ if (idx >= fieldIndex.numIndexTerms) {
+ return -1;
+ }
+ ord += totalIndexInterval;
+
+ final long offset = fieldIndex.termOffsets.get(idx);
+ final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+ termBytesReader.fillSlice(nextTerm, fieldIndex.termBytesStart + offset, length);
+ return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+ }
+
+ @Override
+ public long ord() {
+ return ord;
+ }
+
+ @Override
+ public long seek(long ord) {
+ int idx = (int) (ord / totalIndexInterval);
+ // caller must ensure ord is in bounds
+ assert idx < fieldIndex.numIndexTerms;
+ final long offset = fieldIndex.termOffsets.get(idx);
+ final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+ termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+ this.ord = idx * totalIndexInterval;
+ return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+ }
+ }
+
+ @Override
+ public boolean supportsOrd() {
+ return true;
+ }
+
+ private final class FieldIndexData {
+
+ final private FieldInfo fieldInfo;
+
+ volatile CoreFieldIndex coreIndex;
private final long indexStart;
private final long termsStart;
@@ -167,11 +232,10 @@ public class FixedGapTermsIndexReader ex
private final int numIndexTerms;
- public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
- long packedOffsetsStart) throws IOException {
+ public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
+ long packedOffsetsStart) throws IOException {
this.fieldInfo = fieldInfo;
- this.in = in;
this.termsStart = termsStart;
this.indexStart = indexStart;
this.packedIndexStart = packedIndexStart;
@@ -182,12 +246,7 @@ public class FixedGapTermsIndexReader ex
// is -1, so that PrefixCodedTermsReader can call
// isIndexTerm for each field:
if (indexDivisor > 0) {
- coreIndex = new CoreFieldIndex(indexStart,
- termsStart,
- packedIndexStart,
- packedOffsetsStart,
- numIndexTerms);
-
+ loadTermsIndex();
}
}
@@ -197,46 +256,11 @@ public class FixedGapTermsIndexReader ex
}
}
- @Override
- public boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) {
- if (onlyLoaded) {
- return ord % totalIndexInterval == 0;
- } else {
- return ord % indexInterval == 0;
- }
- }
-
- @Override
- public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
- if (coreIndex == null) {
- throw new IllegalStateException("terms index was not loaded");
- } else {
- return coreIndex.nextIndexTerm(ord, result);
- }
- }
-
- @Override
- public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
- // You must call loadTermsIndex if you had specified -1 for indexDivisor
- if (coreIndex == null) {
- throw new IllegalStateException("terms index was not loaded");
- }
- coreIndex.getIndexOffset(term, result);
- }
-
- @Override
- public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
- // You must call loadTermsIndex if you had specified
- // indexDivisor < 0 to ctor
- if (coreIndex == null) {
- throw new IllegalStateException("terms index was not loaded");
- }
- coreIndex.getIndexOffset(ord, result);
- }
-
private final class CoreFieldIndex {
- final private long termBytesStart;
+ // where this field's terms begin in the packed byte[]
+ // data
+ final long termBytesStart;
// offset into index termBytes
final PackedInts.Reader termOffsets;
@@ -245,7 +269,6 @@ public class FixedGapTermsIndexReader ex
final PackedInts.Reader termsDictOffsets;
final int numIndexTerms;
-
final long termsStart;
public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
@@ -315,7 +338,6 @@ public class FixedGapTermsIndexReader ex
termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
termOffsetsM.set(upto, termOffsetUpto);
- upto++;
long termOffset = termOffsetsIter.next();
long nextTermOffset = termOffsetsIter.next();
@@ -328,6 +350,11 @@ public class FixedGapTermsIndexReader ex
termBytes.copy(clone, numTermBytes);
termOffsetUpto += numTermBytes;
+ upto++;
+ if (upto == this.numIndexTerms) {
+ break;
+ }
+
// skip terms:
termsDictOffsetsIter.next();
for(int i=0;i<indexDivisor-2;i++) {
@@ -344,71 +371,10 @@ public class FixedGapTermsIndexReader ex
}
}
}
-
- public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
- int idx = 1 + (int) (ord / totalIndexInterval);
- if (idx < numIndexTerms) {
- fillResult(idx, result);
- return true;
- } else {
- return false;
- }
- }
-
- private void fillResult(int idx, TermsIndexResult result) {
- final long offset = termOffsets.get(idx);
- final int length = (int) (termOffsets.get(1+idx) - offset);
- termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
- result.position = idx * totalIndexInterval;
- result.offset = termsStart + termsDictOffsets.get(idx);
- }
-
- public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
- int lo = 0; // binary search
- int hi = numIndexTerms - 1;
- assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
-
- while (hi >= lo) {
- int mid = (lo + hi) >>> 1;
-
- final long offset = termOffsets.get(mid);
- final int length = (int) (termOffsets.get(1+mid) - offset);
- termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
- int delta = termComp.compare(term, result.term);
- if (delta < 0) {
- hi = mid - 1;
- } else if (delta > 0) {
- lo = mid + 1;
- } else {
- assert mid >= 0;
- result.position = mid*totalIndexInterval;
- result.offset = termsStart + termsDictOffsets.get(mid);
- return;
- }
- }
- if (hi < 0) {
- assert hi == -1;
- hi = 0;
- }
-
- final long offset = termOffsets.get(hi);
- final int length = (int) (termOffsets.get(1+hi) - offset);
- termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
- result.position = hi*totalIndexInterval;
- result.offset = termsStart + termsDictOffsets.get(hi);
- }
-
- public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
- int idx = (int) (ord / totalIndexInterval);
- // caller must ensure ord is in bounds
- assert idx < numIndexTerms;
- fillResult(idx, result);
- }
}
}
+ // Externally synced in IndexWriter
@Override
public void loadTermsIndex(int indexDivisor) throws IOException {
if (!indexLoaded) {
@@ -420,7 +386,7 @@ public class FixedGapTermsIndexReader ex
}
this.totalIndexInterval = indexInterval * this.indexDivisor;
- Iterator<FieldIndexReader> it = fields.values().iterator();
+ Iterator<FieldIndexData> it = fields.values().iterator();
while(it.hasNext()) {
it.next().loadTermsIndex();
}
@@ -432,8 +398,13 @@ public class FixedGapTermsIndexReader ex
}
@Override
- public FieldReader getField(FieldInfo fieldInfo) {
- return fields.get(fieldInfo);
+ public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
+ final FieldIndexData fieldData = fields.get(fieldInfo);
+ if (fieldData.coreIndex == null) {
+ return null;
+ } else {
+ return new IndexEnum(fieldData.coreIndex);
+ }
}
public static void files(Directory dir, SegmentInfo info, String id, Collection<String> files) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Thu Jan 13 02:09:33 2011
@@ -31,7 +31,14 @@ import java.util.List;
import java.util.ArrayList;
import java.io.IOException;
-/** @lucene.experimental */
+/**
+ * Selects every Nth term as and index term, and hold term
+ * bytes fully expanded in memory. This terms index
+ * supports seeking by ord. See {@link
+ * VariableGapTermsIndexWriter} for a more memory efficient
+ * terms index that does not support seeking by ord.
+ *
+ * @lucene.experimental */
public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
protected final IndexOutput out;
@@ -50,7 +57,6 @@ public class FixedGapTermsIndexWriter ex
public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
- state.flushedFiles.add(indexFileName);
termIndexInterval = state.termIndexInterval;
out = state.directory.createOutput(indexFileName);
fieldInfos = state.fieldInfos;
@@ -203,15 +209,25 @@ public class FixedGapTermsIndexWriter ex
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
- out.writeInt(fieldCount);
+ int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
- out.writeInt(field.fieldInfo.number);
- out.writeInt(field.numIndexTerms);
- out.writeLong(field.termsStart);
- out.writeLong(field.indexStart);
- out.writeLong(field.packedIndexStart);
- out.writeLong(field.packedOffsetsStart);
+ if (field.numIndexTerms > 0) {
+ nonNullFieldCount++;
+ }
+ }
+
+ out.writeVInt(nonNullFieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ SimpleFieldWriter field = fields.get(i);
+ if (field.numIndexTerms > 0) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVInt(field.numIndexTerms);
+ out.writeVLong(field.termsStart);
+ out.writeVLong(field.indexStart);
+ out.writeVLong(field.packedIndexStart);
+ out.writeVLong(field.packedOffsetsStart);
+ }
}
writeTrailer(dirStart);
out.close();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Thu Jan 13 02:09:33 2011
@@ -141,12 +141,10 @@ public class PrefixCodedTermsReader exte
final long numTerms = in.readLong();
assert numTerms >= 0;
final long termsStartPointer = in.readLong();
- final TermsIndexReaderBase.FieldReader fieldIndexReader;
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
- fieldIndexReader = indexReader.getField(fieldInfo);
if (numTerms > 0) {
assert !fields.containsKey(fieldInfo.name);
- fields.put(fieldInfo.name, new FieldReader(fieldIndexReader, fieldInfo, numTerms, termsStartPointer));
+ fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer));
}
}
success = true;
@@ -252,14 +250,12 @@ public class PrefixCodedTermsReader exte
final long numTerms;
final FieldInfo fieldInfo;
final long termsStartPointer;
- final TermsIndexReaderBase.FieldReader fieldIndexReader;
- FieldReader(TermsIndexReaderBase.FieldReader fieldIndexReader, FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
+ FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.termsStartPointer = termsStartPointer;
- this.fieldIndexReader = fieldIndexReader;
}
@Override
@@ -282,18 +278,25 @@ public class PrefixCodedTermsReader exte
return numTerms;
}
- // Iterates through terms in this field
+ // Iterates through terms in this field, not supporting ord()
private class SegmentTermsEnum extends TermsEnum {
private final IndexInput in;
private final DeltaBytesReader bytesReader;
private final TermState state;
private boolean seekPending;
- private final TermsIndexReaderBase.TermsIndexResult indexResult = new TermsIndexReaderBase.TermsIndexResult();
private final FieldAndTerm fieldTerm = new FieldAndTerm();
+ private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
+ private boolean positioned;
+ private boolean didIndexNext;
+ private BytesRef nextIndexTerm;
+ private boolean isIndexTerm;
+ private final boolean doOrd;
SegmentTermsEnum() throws IOException {
in = (IndexInput) PrefixCodedTermsReader.this.in.clone();
in.seek(termsStartPointer);
+ indexEnum = indexReader.getFieldEnum(fieldInfo);
+ doOrd = indexReader.supportsOrd();
bytesReader = new DeltaBytesReader(in);
fieldTerm.field = fieldInfo.name;
state = postingsReader.newTermState();
@@ -313,12 +316,41 @@ public class PrefixCodedTermsReader exte
stateCopy);
}
+ // called only from assert
+ private boolean first;
+ private int indexTermCount;
+
+ private boolean startSeek() {
+ first = true;
+ indexTermCount = 0;
+ return true;
+ }
+
+ private boolean checkSeekScan() {
+ if (!first && isIndexTerm) {
+ indexTermCount++;
+ if (indexTermCount >= indexReader.getDivisor()) {
+ //System.out.println("now fail count=" + indexTermCount);
+ return false;
+ }
+ }
+ first = false;
+ return true;
+ }
+
/** Seeks until the first term that's >= the provided
* text; returns SeekStatus.FOUND if the exact term
* is found, SeekStatus.NOT_FOUND if a different term
* was found, SeekStatus.END if we hit EOF */
@Override
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+
+ if (indexEnum == null) {
+ throw new IllegalStateException("terms index was not loaded");
+ }
+
+ //System.out.println("te.seek term=" + fieldInfo.name + ":" + term.utf8ToString() + " current=" + term().utf8ToString() + " useCache=" + useCache + " this=" + this);
+
// Check cache
TermState cachedState;
if (useCache) {
@@ -327,7 +359,9 @@ public class PrefixCodedTermsReader exte
if (cachedState != null) {
state.copy(cachedState);
seekPending = true;
+ positioned = false;
bytesReader.term.copy(term);
+ //System.out.println(" cached!");
return SeekStatus.FOUND;
}
} else {
@@ -336,36 +370,54 @@ public class PrefixCodedTermsReader exte
boolean doSeek = true;
- if (state.ord != -1) {
- // we are positioned
+ if (positioned) {
final int cmp = termComp.compare(bytesReader.term, term);
if (cmp == 0) {
// already at the requested term
return SeekStatus.FOUND;
- }
+ } else if (cmp < 0) {
+
+ if (seekPending) {
+ seekPending = false;
+ in.seek(state.filePointer);
+ indexEnum.seek(bytesReader.term);
+ didIndexNext = false;
+ }
+
+ // Target term is after current term
+ if (!didIndexNext) {
+ if (indexEnum.next() == -1) {
+ nextIndexTerm = null;
+ } else {
+ nextIndexTerm = indexEnum.term();
+ }
+ //System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+ didIndexNext = true;
+ }
- if (cmp < 0 &&
- fieldIndexReader.nextIndexTerm(state.ord, indexResult) &&
- termComp.compare(indexResult.term, term) > 0) {
- // Optimization: requested term is within the
- // same index block we are now in; skip seeking
- // (but do scanning):
- doSeek = false;
+ if (nextIndexTerm == null || termComp.compare(term, nextIndexTerm) < 0) {
+ // Optimization: requested term is within the
+ // same index block we are now in; skip seeking
+ // (but do scanning):
+ doSeek = false;
+ //System.out.println(" skip seek: nextIndexTerm=" + nextIndexTerm);
+ }
}
}
- // Used only for assert:
- final long startOrd;
-
if (doSeek) {
- // As index to find biggest index term that's <=
- // our text:
- fieldIndexReader.getIndexOffset(term, indexResult);
+ positioned = true;
- in.seek(indexResult.offset);
+ // Ask terms index to find biggest index term that's <=
+ // our text:
+ in.seek(indexEnum.seek(term));
+ didIndexNext = false;
+ if (doOrd) {
+ state.ord = indexEnum.ord()-1;
+ }
seekPending = false;
// NOTE: the first next() after an index seek is
@@ -374,21 +426,21 @@ public class PrefixCodedTermsReader exte
// those bytes in the primary file, but then when
// scanning over an index term we'd have to
// special case it:
- bytesReader.reset(indexResult.term);
-
- state.ord = indexResult.position-1;
- assert state.ord >= -1: "ord=" + state.ord + " pos=" + indexResult.position;
-
- startOrd = indexResult.position;
+ bytesReader.reset(indexEnum.term());
+ //System.out.println(" doSeek term=" + indexEnum.term().utf8ToString() + " vs target=" + term.utf8ToString());
} else {
- startOrd = -1;
+ //System.out.println(" skip seek");
}
+ assert startSeek();
+
// Now scan:
- while(next() != null) {
+ while (next() != null) {
final int cmp = termComp.compare(bytesReader.term, term);
if (cmp == 0) {
- if (doSeek && useCache) {
+
+ // Done!
+ if (useCache) {
// Store in cache
FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
cachedState = (TermState) state.clone();
@@ -396,93 +448,62 @@ public class PrefixCodedTermsReader exte
cachedState.filePointer = in.getFilePointer();
termsCache.put(entryKey, cachedState);
}
+
return SeekStatus.FOUND;
} else if (cmp > 0) {
return SeekStatus.NOT_FOUND;
}
+
// The purpose of the terms dict index is to seek
// the enum to the closest index term before the
// term we are looking for. So, we should never
// cross another index term (besides the first
// one) while we are scanning:
- assert state.ord == startOrd || !fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true): "state.ord=" + state.ord + " startOrd=" + startOrd + " ir.isIndexTerm=" + fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true) + " state.docFreq=" + state.docFreq;
+ assert checkSeekScan();
}
+ positioned = false;
return SeekStatus.END;
}
@Override
- public SeekStatus seek(long ord) throws IOException {
-
- // TODO: should we cache term lookup by ord as well...?
-
- if (ord >= numTerms) {
- state.ord = numTerms-1;
- return SeekStatus.END;
- }
-
- fieldIndexReader.getIndexOffset(ord, indexResult);
- in.seek(indexResult.offset);
- seekPending = false;
-
- // NOTE: the first next() after an index seek is
- // wasteful, since it redundantly reads the same
- // bytes into the buffer
- bytesReader.reset(indexResult.term);
-
- state.ord = indexResult.position-1;
- assert state.ord >= -1: "ord=" + state.ord;
-
- // Now, scan:
- int left = (int) (ord - state.ord);
- while(left > 0) {
- final BytesRef term = next();
- assert term != null;
- left--;
- }
-
- // always found
- return SeekStatus.FOUND;
- }
-
- @Override
public BytesRef term() {
return bytesReader.term;
}
@Override
- public long ord() {
- return state.ord;
- }
-
- @Override
public BytesRef next() throws IOException {
if (seekPending) {
seekPending = false;
in.seek(state.filePointer);
+ indexEnum.seek(bytesReader.term);
+ didIndexNext = false;
}
- if (state.ord >= numTerms-1) {
+ if (!bytesReader.read()) {
+ //System.out.println("te.next end!");
+ positioned = false;
return null;
}
- bytesReader.read();
- state.docFreq = in.readVInt();
+ final byte b = in.readByte();
+ isIndexTerm = (b & 0x80) != 0;
+
+ if ((b & 0x40) == 0) {
+ // Fast case -- docFreq fits in 6 bits
+ state.docFreq = b & 0x3F;
+ } else {
+ state.docFreq = (in.readVInt() << 6) | (b & 0x3F);
+ }
- // TODO: would be cleaner, but space-wasting, to
- // simply record a bit into each index entry as to
- // whether it's an index entry or not, rather than
- // re-compute that information... or, possibly store
- // a "how many terms until next index entry" in each
- // index entry, but that'd require some tricky
- // lookahead work when writing the index
postingsReader.readTerm(in,
fieldInfo, state,
- fieldIndexReader.isIndexTerm(1+state.ord, state.docFreq, false));
-
+ isIndexTerm);
state.ord++;
+ positioned = true;
+ //System.out.println("te.next term=" + bytesReader.term.utf8ToString());
return bytesReader.term;
}
@@ -512,6 +533,50 @@ public class PrefixCodedTermsReader exte
return postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse);
}
}
+
+ @Override
+ public SeekStatus seek(long ord) throws IOException {
+
+ if (indexEnum == null) {
+ throw new IllegalStateException("terms index was not loaded");
+ }
+
+ if (ord >= numTerms) {
+ state.ord = numTerms-1;
+ return SeekStatus.END;
+ }
+
+ in.seek(indexEnum.seek(ord));
+ seekPending = false;
+ positioned = true;
+
+ // NOTE: the first next() after an index seek is
+ // wasteful, since it redundantly reads the same
+ // bytes into the buffer
+ bytesReader.reset(indexEnum.term());
+
+ state.ord = indexEnum.ord()-1;
+ assert state.ord >= -1: "ord=" + state.ord;
+
+ // Now, scan:
+ int left = (int) (ord - state.ord);
+ while(left > 0) {
+ final BytesRef term = next();
+ assert term != null;
+ left--;
+ }
+
+ // always found
+ return SeekStatus.FOUND;
+ }
+
+ @Override
+ public long ord() {
+ if (!doOrd) {
+ throw new UnsupportedOperationException();
+ }
+ return state.ord;
+ }
}
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Thu Jan 13 02:09:33 2011
@@ -74,7 +74,6 @@ public class PrefixCodedTermsWriter exte
this.termComp = termComp;
out = state.directory.createOutput(termsFileName);
termsIndexWriter.setTermsOutput(out);
- state.flushedFiles.add(termsFileName);
fieldInfos = state.fieldInfos;
writeHeader(out);
@@ -93,7 +92,7 @@ public class PrefixCodedTermsWriter exte
}
@Override
- public TermsConsumer addField(FieldInfo field) {
+ public TermsConsumer addField(FieldInfo field) throws IOException {
assert currentField == null || currentField.name.compareTo(field.name) < 0;
currentField = field;
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field);
@@ -173,12 +172,25 @@ public class PrefixCodedTermsWriter exte
public void finishTerm(BytesRef text, int numDocs) throws IOException {
assert numDocs > 0;
+ //System.out.println("finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " fp=" + out.getFilePointer());
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs);
termWriter.write(text);
- out.writeVInt(numDocs);
+ final int highBit = isIndexTerm ? 0x80 : 0;
+ //System.out.println(" isIndex=" + isIndexTerm);
+ // This is a vInt, except, we steal top bit to record
+ // whether this was an indexed term:
+ if ((numDocs & ~0x3F) == 0) {
+ // Fast case -- docFreq fits in 6 bits
+ out.writeByte((byte) (highBit | numDocs));
+ } else {
+ // Write bottom 6 bits of docFreq, then write the
+ // remainder as vInt:
+ out.writeByte((byte) (highBit | 0x40 | (numDocs & 0x3F)));
+ out.writeVInt(numDocs >>> 6);
+ }
postingsWriter.finishTerm(numDocs, isIndexTerm);
numTerms++;
}
@@ -186,6 +198,8 @@ public class PrefixCodedTermsWriter exte
// Finishes all terms in this field
@Override
public void finish() throws IOException {
+ // EOF marker:
+ out.writeVInt(DeltaBytesWriter.TERM_EOF);
fieldIndexWriter.finish();
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Thu Jan 13 02:09:33 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
+import java.io.Closeable;
import java.util.Collection;
@@ -38,39 +39,37 @@ import java.util.Collection;
* text.
* @lucene.experimental */
-public abstract class TermsIndexReaderBase {
+public abstract class TermsIndexReaderBase implements Closeable {
- static class TermsIndexResult {
- long position;
- final BytesRef term = new BytesRef();
- long offset;
- };
-
- public abstract class FieldReader {
- /** Returns position of "largest" index term that's <=
- * text. Returned TermsIndexResult may be reused
- * across calls. This resets internal state, and
- * expects that you'll then scan the file and
- * sequentially call isIndexTerm for each term
- * encountered. */
- public abstract void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException;
-
- public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
-
- /** Call this sequentially for each term encountered,
- * after calling {@link #getIndexOffset}. */
- public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
-
- /** Finds the next index term, after the specified
- * ord. Returns true if one exists. */
- public abstract boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException;
- }
-
- public abstract FieldReader getField(FieldInfo fieldInfo);
+ public abstract FieldIndexEnum getFieldEnum(FieldInfo fieldInfo);
public abstract void loadTermsIndex(int indexDivisor) throws IOException;
public abstract void close() throws IOException;
public abstract void getExtensions(Collection<String> extensions);
-}
\ No newline at end of file
+
+ public abstract boolean supportsOrd();
+
+ public abstract int getDivisor();
+
+ // Similar to TermsEnum, except, the only "metadata" it
+ // reports for a given indexed term is the long fileOffset
+ // into the main terms dict (_X.tis) file:
+ public static abstract class FieldIndexEnum {
+
+ /** Seeks to "largest" indexed term that's <=
+ * term; retruns file pointer index (into the main
+ * terms index file) for that term */
+ public abstract long seek(BytesRef term) throws IOException;
+
+ /** Returns -1 at end */
+ public abstract long next() throws IOException;
+
+ public abstract BytesRef term();
+
+ // Only impl'd if supportsOrd() returns true!
+ public abstract long seek(long ord) throws IOException;
+ public abstract long ord();
+ }
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Thu Jan 13 02:09:33 2011
@@ -32,7 +32,7 @@ public abstract class TermsIndexWriterBa
public abstract void finish() throws IOException;
}
- public abstract FieldWriter addField(FieldInfo fieldInfo);
+ public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException;
public abstract void close() throws IOException;
-}
\ No newline at end of file
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Thu Jan 13 02:09:33 2011
@@ -230,8 +230,11 @@ public final class PulsingPostingsWriter
}
@Override
- public void finishDoc() {
+ public void finishDoc() throws IOException {
assert omitTF || currentDoc.numPositions == currentDoc.termDocFreq;
+ if (pulsed) {
+ wrappedPostingsWriter.finishDoc();
+ }
}
boolean pendingIsIndexTerm;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Thu Jan 13 02:09:33 2011
@@ -699,6 +699,7 @@ public class SepPostingsReaderImpl exten
final int code = nextPosInt();
+ assert code >= 0;
if (storePayloads) {
if ((code & 1) != 0) {
// Payload length has changed
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Thu Jan 13 02:09:33 2011
@@ -85,24 +85,20 @@ public final class SepPostingsWriterImpl
super();
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
- state.flushedFiles.add(docFileName);
docOut = factory.createOutput(state.directory, docFileName);
docIndex = docOut.index();
if (state.fieldInfos.hasProx()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
- state.flushedFiles.add(frqFileName);
freqOut = factory.createOutput(state.directory, frqFileName);
freqIndex = freqOut.index();
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName);
- state.flushedFiles.add(posFileName);
posIndex = posOut.index();
// TODO: -- only if at least one field stores payloads?
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
- state.flushedFiles.add(payloadFileName);
payloadOut = state.directory.createOutput(payloadFileName);
} else {
@@ -114,7 +110,6 @@ public final class SepPostingsWriterImpl
}
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
- state.flushedFiles.add(skipFileName);
skipOut = state.directory.createOutput(skipFileName);
totalNumDocs = state.numDocs;
@@ -216,6 +211,7 @@ public final class SepPostingsWriterImpl
assert !omitTF;
final int delta = position - lastPosition;
+ assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
if (storePayloads) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Thu Jan 13 02:09:33 2011
@@ -131,9 +131,8 @@ class SimpleTextFieldsReader extends Fie
public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
- fstEnum.reset();
//System.out.println("seek to text=" + text.utf8ToString());
- final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.advance(text);
+ final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.seekCeil(text);
if (result == null) {
//System.out.println(" end");
return SeekStatus.END;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java Thu Jan 13 02:09:33 2011
@@ -46,7 +46,6 @@ class SimpleTextFieldsWriter extends Fie
public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName, state.codecId);
out = state.directory.createOutput(fileName);
- state.flushedFiles.add(fileName);
}
private void write(String s) throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Thu Jan 13 02:09:33 2011
@@ -31,8 +31,8 @@ import org.apache.lucene.index.codecs.Po
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
import org.apache.lucene.store.Directory;
@@ -56,7 +56,7 @@ public class StandardCodec extends Codec
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
- indexWriter = new FixedGapTermsIndexWriter(state);
+ indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
success = true;
} finally {
if (!success) {
@@ -89,12 +89,11 @@ public class StandardCodec extends Codec
boolean success = false;
try {
- indexReader = new FixedGapTermsIndexReader(state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- state.termsIndexDivisor,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- state.codecId);
+ indexReader = new VariableGapTermsIndexReader(state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ state.termsIndexDivisor,
+ state.codecId);
success = true;
} finally {
if (!success) {
@@ -136,7 +135,7 @@ public class StandardCodec extends Codec
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, id, files);
PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
- FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
+ VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@Override
@@ -148,6 +147,6 @@ public class StandardCodec extends Codec
extensions.add(FREQ_EXTENSION);
extensions.add(PROX_EXTENSION);
PrefixCodedTermsReader.getExtensions(extensions);
- FixedGapTermsIndexReader.getIndexExtensions(extensions);
+ VariableGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Thu Jan 13 02:09:33 2011
@@ -61,14 +61,12 @@ public final class StandardPostingsWrite
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
super();
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
- state.flushedFiles.add(fileName);
freqOut = state.directory.createOutput(fileName);
if (state.fieldInfos.hasProx()) {
// At least one field does not omit TF, so create the
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
- state.flushedFiles.add(fileName);
proxOut = state.directory.createOutput(fileName);
} else {
// Every field omits TF so we will write no prox file
@@ -157,7 +155,7 @@ public final class StandardPostingsWrite
final int delta = position - lastPosition;
- assert delta > 0 || position == 0 || position == -1: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
+ assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Thu Jan 13 02:09:33 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -62,10 +63,12 @@ public class BooleanQuery extends Query
}
private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
- private boolean disableCoord;
+ private final boolean disableCoord;
/** Constructs an empty boolean query. */
- public BooleanQuery() {}
+ public BooleanQuery() {
+ disableCoord = false;
+ }
/** Constructs an empty boolean query.
*
@@ -86,22 +89,6 @@ public class BooleanQuery extends Query
*/
public boolean isCoordDisabled() { return disableCoord; }
- // Implement coord disabling.
- // Inherit javadoc.
- @Override
- public Similarity getSimilarity(Searcher searcher) {
- Similarity result = super.getSimilarity(searcher);
- if (disableCoord) { // disable coord as requested
- result = new SimilarityDelegator(result) {
- @Override
- public float coord(int overlap, int maxOverlap) {
- return 1.0f;
- }
- };
- }
- return result;
- }
-
/**
* Specifies a minimum number of the optional BooleanClauses
* which must be satisfied.
@@ -178,10 +165,12 @@ public class BooleanQuery extends Query
protected Similarity similarity;
protected ArrayList<Weight> weights;
protected int maxCoord; // num optional + num required
+ private final boolean disableCoord;
- public BooleanWeight(Searcher searcher)
+ public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
throws IOException {
this.similarity = getSimilarity(searcher);
+ this.disableCoord = disableCoord;
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
@@ -223,7 +212,7 @@ public class BooleanQuery extends Query
}
@Override
- public Explanation explain(IndexReader reader, int doc)
+ public Explanation explain(AtomicReaderContext context, int doc)
throws IOException {
final int minShouldMatch =
BooleanQuery.this.getMinimumNumberShouldMatch();
@@ -237,7 +226,7 @@ public class BooleanQuery extends Query
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
- if (w.scorer(reader, true, true) == null) {
+ if (w.scorer(context, true, true) == null) {
if (c.isRequired()) {
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -245,7 +234,7 @@ public class BooleanQuery extends Query
}
continue;
}
- Explanation e = w.explain(reader, doc);
+ Explanation e = w.explain(context, doc);
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
@@ -284,10 +273,10 @@ public class BooleanQuery extends Query
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);
- float coordFactor = similarity.coord(coord, maxCoord);
- if (coordFactor == 1.0f) // coord is no-op
+ final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord);
+ if (coordFactor == 1.0f) {
return sumExpl; // eliminate wrapper
- else {
+ } else {
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
sum*coordFactor,
"product of:");
@@ -299,7 +288,7 @@ public class BooleanQuery extends Query
}
@Override
- public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer)
+ public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer)
throws IOException {
List<Scorer> required = new ArrayList<Scorer>();
List<Scorer> prohibited = new ArrayList<Scorer>();
@@ -307,7 +296,7 @@ public class BooleanQuery extends Query
Iterator<BooleanClause> cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
- Scorer subScorer = w.scorer(reader, true, false);
+ Scorer subScorer = w.scorer(context, true, false);
if (subScorer == null) {
if (c.isRequired()) {
return null;
@@ -323,7 +312,7 @@ public class BooleanQuery extends Query
// Check if we can return a BooleanScorer
if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
- return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
+ return new BooleanScorer(this, disableCoord, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
}
if (required.size() == 0 && optional.size() == 0) {
@@ -337,7 +326,7 @@ public class BooleanQuery extends Query
}
// Return a BooleanScorer2
- return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
+ return new BooleanScorer2(this, disableCoord, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
@Override
@@ -362,8 +351,8 @@ public class BooleanQuery extends Query
}
@Override
- public Weight createWeight(Searcher searcher) throws IOException {
- return new BooleanWeight(searcher);
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new BooleanWeight(searcher, disableCoord);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Thu Jan 13 02:09:33 2011
@@ -197,9 +197,9 @@ final class BooleanScorer extends Scorer
private Bucket current;
private int doc = -1;
- BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch,
+ BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
- super(similarity, weight);
+ super(null, weight); // Similarity not used
this.minNrShouldMatch = minNrShouldMatch;
if (optionalScorers != null && optionalScorers.size() > 0) {
@@ -222,15 +222,14 @@ final class BooleanScorer extends Scorer
}
coordFactors = new float[optionalScorers.size() + 1];
- Similarity sim = getSimilarity();
for (int i = 0; i < coordFactors.length; i++) {
- coordFactors[i] = sim.coord(i, maxCoord);
+ coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
}
}
// firstDocID is ignored since nextDoc() initializes 'current'
@Override
- protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+ public boolean score(Collector collector, int max, int firstDocID) throws IOException {
boolean more;
Bucket tmp;
BucketScorer bs = new BucketScorer();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Thu Jan 13 02:09:33 2011
@@ -42,14 +42,12 @@ class BooleanScorer2 extends Scorer {
int maxCoord = 0; // to be increased for each non prohibited scorer
int nrMatchers; // to be increased by score() of match counting scorers.
- void init() { // use after all scorers have been added.
+ void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added.
coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
- Similarity sim = getSimilarity();
for (int i = 0; i < coordFactors.length; i++) {
- coordFactors[i] = sim.coord(i, maxCoord);
+ coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord);
}
}
-
}
private final Coordinator coordinator;
@@ -82,9 +80,9 @@ class BooleanScorer2 extends Scorer {
* @param optional
* the list of optional scorers.
*/
- public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch,
+ public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
- super(similarity, weight);
+ super(null, weight); // Similarity not used
if (minNrShouldMatch < 0) {
throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
}
@@ -96,8 +94,8 @@ class BooleanScorer2 extends Scorer {
requiredScorers = required;
prohibitedScorers = prohibited;
- coordinator.init();
- countingSumScorer = makeCountingSumScorer();
+ coordinator.init(similarity, disableCoord);
+ countingSumScorer = makeCountingSumScorer(disableCoord, similarity);
}
/** Count a scorer as a single match. */
@@ -109,7 +107,7 @@ class BooleanScorer2 extends Scorer {
private float lastDocScore = Float.NaN;
SingleMatchScorer(Scorer scorer) {
- super(scorer.getSimilarity());
+ super(null); // No similarity used.
this.scorer = scorer;
}
@@ -164,12 +162,12 @@ class BooleanScorer2 extends Scorer {
};
}
- private static final Similarity defaultSimilarity = Similarity.getDefault();
-
- private Scorer countingConjunctionSumScorer(List<Scorer> requiredScorers) throws IOException {
+ private Scorer countingConjunctionSumScorer(boolean disableCoord,
+ Similarity similarity,
+ List<Scorer> requiredScorers) throws IOException {
// each scorer from the list counted as a single matcher
final int requiredNrMatchers = requiredScorers.size();
- return new ConjunctionScorer(defaultSimilarity, requiredScorers) {
+ return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
@@ -192,8 +190,10 @@ class BooleanScorer2 extends Scorer {
};
}
- private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting.
- return new ConjunctionScorer(defaultSimilarity, req1, req2);
+ private Scorer dualConjunctionSumScorer(boolean disableCoord,
+ Similarity similarity,
+ Scorer req1, Scorer req2) throws IOException { // non counting.
+ return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(2, 2), req1, req2);
// All scorers match, so defaultSimilarity always has 1 as
// the coordination factor.
// Therefore the sum of the scores of two scorers
@@ -203,13 +203,14 @@ class BooleanScorer2 extends Scorer {
/** Returns the scorer to be used for match counting and score summing.
* Uses requiredScorers, optionalScorers and prohibitedScorers.
*/
- private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher
+ private Scorer makeCountingSumScorer(boolean disableCoord,
+ Similarity similarity) throws IOException { // each scorer counted as a single matcher
return (requiredScorers.size() == 0)
- ? makeCountingSumScorerNoReq()
- : makeCountingSumScorerSomeReq();
+ ? makeCountingSumScorerNoReq(disableCoord, similarity)
+ : makeCountingSumScorerSomeReq(disableCoord, similarity);
}
- private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers
+ private Scorer makeCountingSumScorerNoReq(boolean disableCoord, Similarity similarity) throws IOException { // No required scorers
// minNrShouldMatch optional scorers are required, but at least 1
int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch;
Scorer requiredCountingSumScorer;
@@ -217,24 +218,27 @@ class BooleanScorer2 extends Scorer {
requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
else if (optionalScorers.size() == 1)
requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0));
- else
- requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers);
+ else {
+ requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, similarity, optionalScorers);
+ }
return addProhibitedScorers(requiredCountingSumScorer);
}
- private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer.
+ private Scorer makeCountingSumScorerSomeReq(boolean disableCoord, Similarity similarity) throws IOException { // At least one required scorer.
if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required.
ArrayList<Scorer> allReq = new ArrayList<Scorer>(requiredScorers);
allReq.addAll(optionalScorers);
- return addProhibitedScorers(countingConjunctionSumScorer(allReq));
+ return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, similarity, allReq));
} else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
Scorer requiredCountingSumScorer =
requiredScorers.size() == 1
? new SingleMatchScorer(requiredScorers.get(0))
- : countingConjunctionSumScorer(requiredScorers);
+ : countingConjunctionSumScorer(disableCoord, similarity, requiredScorers);
if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers
return addProhibitedScorers(
dualConjunctionSumScorer( // non counting
+ disableCoord,
+ similarity,
requiredCountingSumScorer,
countingDisjunctionSumScorer(
optionalScorers,
@@ -276,7 +280,7 @@ class BooleanScorer2 extends Scorer {
}
@Override
- protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+ public boolean score(Collector collector, int max, int firstDocID) throws IOException {
doc = firstDocID;
collector.setScorer(this);
while (doc < max) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java Thu Jan 13 02:09:33 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource; // javadocs only
import org.apache.lucene.index.TermsEnum; // javadocs only
-/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}
+/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)}
* and update the boost on each returned term. This enables to control the boost factor
* for each matching term in {@link MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE} or
* {@link TopTermsRewrite} mode.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java Thu Jan 13 02:09:33 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@@ -61,8 +61,8 @@ public class CachingSpanFilter extends S
}
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- SpanFilterResult result = getCachedResult(reader);
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ SpanFilterResult result = getCachedResult(context.reader);
return result != null ? result.getDocIdSet() : null;
}
@@ -72,7 +72,7 @@ public class CachingSpanFilter extends S
private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
final Object coreKey = reader.getCoreCacheKey();
- final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+ final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
SpanFilterResult result = cache.get(reader, coreKey, delCoreKey);
if (result != null) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java Thu Jan 13 02:09:33 2011
@@ -23,7 +23,7 @@ import java.util.Map;
import java.util.WeakHashMap;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.Bits;
@@ -38,6 +38,9 @@ import org.apache.lucene.util.Bits;
* {@link DeletesMode#DYNAMIC}).
*/
public class CachingWrapperFilter extends Filter {
+ // TODO: make this filter aware of ReaderContext. a cached filter could
+ // specify the actual readers key or something similar to indicate on which
+ // level of the readers hierarchy it should be cached.
Filter filter;
/**
@@ -105,7 +108,7 @@ public class CachingWrapperFilter extend
// deletions
value = cache.get(coreKey);
if (value != null) {
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ final Bits delDocs = reader.getDeletedDocs();
if (delDocs != null) {
value = mergeDeletes(delDocs, value);
}
@@ -192,10 +195,10 @@ public class CachingWrapperFilter extend
int hitCount, missCount;
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ final IndexReader reader = context.reader;
final Object coreKey = reader.getCoreCacheKey();
- final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+ final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey);
if (docIdSet != null) {
@@ -206,7 +209,7 @@ public class CachingWrapperFilter extend
missCount++;
// cache miss
- docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);
+ docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader);
if (docIdSet != null) {
cache.put(coreKey, delCoreKey, docIdSet);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java Thu Jan 13 02:09:33 2011
@@ -136,7 +136,7 @@ public abstract class Collector {
*
* <p>
* Note: This is called in an inner search loop. For good search performance,
- * implementations of this method should not call {@link Searcher#doc(int)} or
+ * implementations of this method should not call {@link IndexSearcher#doc(int)} or
* {@link org.apache.lucene.index.IndexReader#document(int)} on every hit.
* Doing so can slow searches by an order of magnitude or more.
*/
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java Thu Jan 13 02:09:33 2011
@@ -29,14 +29,14 @@ class ConjunctionScorer extends Scorer {
private final float coord;
private int lastDoc = -1;
- public ConjunctionScorer(Similarity similarity, Collection<Scorer> scorers) throws IOException {
- this(similarity, scorers.toArray(new Scorer[scorers.size()]));
+ public ConjunctionScorer(float coord, Collection<Scorer> scorers) throws IOException {
+ this(coord, scorers.toArray(new Scorer[scorers.size()]));
}
- public ConjunctionScorer(Similarity similarity, Scorer... scorers) throws IOException {
- super(similarity);
+ public ConjunctionScorer(float coord, Scorer... scorers) throws IOException {
+ super(null);
this.scorers = scorers;
- coord = similarity.coord(scorers.length, scorers.length);
+ this.coord = coord;
for (int i = 0; i < scorers.length; i++) {
if (scorers[i].nextDoc() == NO_MORE_DOCS) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Thu Jan 13 02:09:33 2011
@@ -103,7 +103,7 @@ class ConstantScoreAutoRewrite extends T
addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f);
}
// Strip scores
- final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost());
query.incTotalNumberOfTerms(size);
return result;