You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 16:07:19 UTC
svn commit: r1160700 [7/22] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/
dev-tools/idea/lucene/contrib/demo/
dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/contrib/q...
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Tue Aug 23 14:06:58 2011
@@ -106,7 +106,7 @@ public class BlockTermsReader extends Fi
}
}
- //private String segment;
+ // private String segment;
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, IOContext context,
int termsCacheSize, int codecId)
@@ -115,7 +115,7 @@ public class BlockTermsReader extends Fi
this.postingsReader = postingsReader;
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
- //this.segment = segment;
+ // this.segment = segment;
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
context);
@@ -165,11 +165,6 @@ public class BlockTermsReader extends Fi
}
@Override
- public void loadTermsIndex(int indexDivisor) throws IOException {
- indexReader.loadTermsIndex(indexDivisor);
- }
-
- @Override
public void close() throws IOException {
try {
try {
@@ -326,6 +321,9 @@ public class BlockTermsReader extends Fi
/* Common prefix used for all terms in this block. */
private int termBlockPrefix;
+ /* How many terms in current block */
+ private int blockTermCount;
+
private byte[] docFreqBytes;
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;
@@ -363,16 +361,14 @@ public class BlockTermsReader extends Fi
throw new IllegalStateException("terms index was not loaded");
}
- /*
- System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
+ //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
if (didIndexNext) {
if (nextIndexTerm == null) {
- System.out.println(" nextIndexTerm=null");
+ //System.out.println(" nextIndexTerm=null");
} else {
- System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
+ //System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
}
}
- */
// Check cache
if (useCache) {
@@ -449,7 +445,7 @@ public class BlockTermsReader extends Fi
//System.out.println(" seek: term=" + term.utf8ToString());
} else {
//System.out.println(" skip seek");
- if (state.termCount == state.blockTermCount && !nextBlock()) {
+ if (state.termBlockOrd == blockTermCount && !nextBlock()) {
indexIsCurrent = false;
return SeekStatus.END;
}
@@ -485,9 +481,9 @@ public class BlockTermsReader extends Fi
// but it could be in next block. We
// must scan to end-of-block to set common
// prefix for next block:
- if (state.termCount < state.blockTermCount) {
- while(state.termCount < state.blockTermCount-1) {
- state.termCount++;
+ if (state.termBlockOrd < blockTermCount) {
+ while(state.termBlockOrd < blockTermCount-1) {
+ state.termBlockOrd++;
state.ord++;
termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
}
@@ -510,7 +506,7 @@ public class BlockTermsReader extends Fi
// Target's prefix is before the common prefix
// of this block, so we position to start of
// block and return NOT_FOUND:
- assert state.termCount == 0;
+ assert state.termBlockOrd == 0;
final int suffix = termSuffixesReader.readVInt();
term.length = termBlockPrefix + suffix;
@@ -528,7 +524,7 @@ public class BlockTermsReader extends Fi
// Test every term in this block
while (true) {
- state.termCount++;
+ state.termBlockOrd++;
state.ord++;
final int suffix = termSuffixesReader.readVInt();
@@ -586,7 +582,7 @@ public class BlockTermsReader extends Fi
}
}
- if (state.termCount == state.blockTermCount) {
+ if (state.termBlockOrd == blockTermCount) {
// Must pre-fill term for next block's common prefix
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
@@ -618,7 +614,7 @@ public class BlockTermsReader extends Fi
@Override
public BytesRef next() throws IOException {
- //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termCount);
+ //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termBlockOrd);
// If seek was previously called and the term was cached,
// usually caller is just going to pull a D/&PEnum or get
@@ -628,7 +624,7 @@ public class BlockTermsReader extends Fi
if (seekPending) {
assert !indexIsCurrent;
in.seek(state.blockFilePointer);
- final int pendingSeekCount = state.termCount;
+ final int pendingSeekCount = state.termBlockOrd;
boolean result = nextBlock();
final long savOrd = state.ord;
@@ -638,7 +634,7 @@ public class BlockTermsReader extends Fi
// on a real term:
assert result;
- while(state.termCount < pendingSeekCount) {
+ while(state.termBlockOrd < pendingSeekCount) {
BytesRef nextResult = _next();
assert nextResult != null;
}
@@ -652,8 +648,8 @@ public class BlockTermsReader extends Fi
metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
decode all metadata up to the current term. */
private BytesRef _next() throws IOException {
- //System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
- if (state.termCount == state.blockTermCount && !nextBlock()) {
+ //System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termBlockOrd + " (vs " + blockTermCount + ")");
+ if (state.termBlockOrd == blockTermCount && !nextBlock()) {
//System.out.println(" eof");
indexIsCurrent = false;
return null;
@@ -668,12 +664,12 @@ public class BlockTermsReader extends Fi
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
- state.termCount++;
+ state.termBlockOrd++;
// NOTE: meaningless in the non-ord case
state.ord++;
- //System.out.println(" return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term);
+ //System.out.println(" return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term + " tbOrd=" + state.termBlockOrd);
return term;
}
@@ -700,9 +696,10 @@ public class BlockTermsReader extends Fi
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
//System.out.println("BTR.docs this=" + this);
decodeMetaData();
- //System.out.println(" state.docFreq=" + state.docFreq);
+ //System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, liveDocs, reuse);
assert docsEnum != null;
+ //System.out.println("BTR.docs: return docsEnum=" + docsEnum);
return docsEnum;
}
@@ -721,7 +718,7 @@ public class BlockTermsReader extends Fi
@Override
public void seekExact(BytesRef target, TermState otherState) throws IOException {
- //System.out.println("BTR.seek termState target=" + target.utf8ToString() + " " + target + " this=" + this);
+ //System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this);
assert otherState != null && otherState instanceof BlockTermState;
assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
state.copyFrom(otherState);
@@ -805,9 +802,9 @@ public class BlockTermsReader extends Fi
//System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
state.blockFilePointer = in.getFilePointer();
- state.blockTermCount = in.readVInt();
- //System.out.println(" blockTermCount=" + state.blockTermCount);
- if (state.blockTermCount == 0) {
+ blockTermCount = in.readVInt();
+ //System.out.println(" blockTermCount=" + blockTermCount);
+ if (blockTermCount == 0) {
return false;
}
termBlockPrefix = in.readVInt();
@@ -831,7 +828,7 @@ public class BlockTermsReader extends Fi
freqReader.reset(docFreqBytes, 0, len);
metaDataUpto = 0;
- state.termCount = 0;
+ state.termBlockOrd = 0;
postingsReader.readTermsBlock(in, fieldInfo, state);
@@ -843,7 +840,7 @@ public class BlockTermsReader extends Fi
}
private void decodeMetaData() throws IOException {
- //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state);
+ //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termBlockOrd + " state=" + state);
if (!seekPending) {
// TODO: cutover to random-access API
// here.... really stupid that we have to decode N
@@ -851,10 +848,10 @@ public class BlockTermsReader extends Fi
// that we really need...
// lazily catch up on metadata decode:
- final int limit = state.termCount;
+ final int limit = state.termBlockOrd;
// We must set/incr state.termCount because
// postings impl can look at this
- state.termCount = metaDataUpto;
+ state.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
@@ -875,9 +872,9 @@ public class BlockTermsReader extends Fi
postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
- state.termCount++;
+ state.termBlockOrd++;
}
- //} else {
+ } else {
//System.out.println(" skip! seekPending");
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java Tue Aug 23 14:06:58 2011
@@ -66,7 +66,7 @@ public class BlockTermsWriter extends Fi
private final TermsIndexWriterBase termsIndexWriter;
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
- //private final String segment;
+ // private final String segment;
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
SegmentWriteState state, PostingsWriterBase postingsWriter)
@@ -80,7 +80,7 @@ public class BlockTermsWriter extends Fi
writeHeader(out);
currentField = null;
this.postingsWriter = postingsWriter;
- //segment = state.segmentName;
+ // segment = state.segmentName;
//System.out.println("BTW.init seg=" + state.segmentName);
@@ -188,7 +188,7 @@ public class BlockTermsWriter extends Fi
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
- //System.out.println("BTW.startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
+ //System.out.println("BTW: startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
postingsWriter.startTerm();
return postingsWriter;
}
@@ -199,7 +199,7 @@ public class BlockTermsWriter extends Fi
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
- //System.out.println("BTW.finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
+ //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
@@ -308,7 +308,7 @@ public class BlockTermsWriter extends Fi
bytesWriter.writeTo(out);
bytesWriter.reset();
- postingsWriter.flushTermsBlock();
+ postingsWriter.flushTermsBlock(pendingCount, pendingCount);
lastPrevTerm.copy(pendingTerms[pendingCount-1].term);
pendingCount = 0;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Tue Aug 23 14:06:58 2011
@@ -84,7 +84,7 @@ public class CodecProvider {
public synchronized Codec lookup(String name) {
final Codec codec = codecs.get(name);
if (codec == null) {
- throw new IllegalArgumentException("required codec '" + name + "' not found");
+ throw new IllegalArgumentException("required codec '" + name + "' not found; known codecs: " + codecs.keySet());
}
return codec;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java Tue Aug 23 14:06:58 2011
@@ -103,21 +103,20 @@ public abstract class DocValuesConsumer
// TODO we need some kind of compatibility notation for values such
// that two slightly different segments can be merged eg. fixed vs.
// variable byte len or float32 vs. float64
- int docBase = 0;
boolean merged = false;
/*
* We ignore the given DocValues here and merge from the subReaders directly
* to support bulk copies on the DocValues Writer level. if this gets merged
* with MultiDocValues the writer can not optimize for bulk-copyable data
*/
- for (final IndexReader reader : mergeState.readers) {
- final IndexDocValues r = reader.docValues(mergeState.fieldInfo.name);
+ for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
+ final org.apache.lucene.index.codecs.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
+ final IndexDocValues r = reader.reader.docValues(mergeState.fieldInfo.name);
if (r != null) {
merged = true;
- merge(new Writer.MergeState(r, docBase, reader.maxDoc(),
- reader.getLiveDocs()));
+ merge(new Writer.MergeState(r, mergeState.docBase[readerIDX], reader.reader.maxDoc(),
+ reader.liveDocs));
}
- docBase += reader.numDocs();
}
if (merged) {
finish(mergeState.mergedDocCount);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java Tue Aug 23 14:06:58 2011
@@ -34,7 +34,6 @@ import org.apache.lucene.index.Terms;
public abstract class FieldsProducer extends Fields implements Closeable {
public abstract void close() throws IOException;
- public abstract void loadTermsIndex(int indexDivisor) throws IOException;
public static final FieldsProducer EMPTY = new FieldsProducer() {
@@ -47,12 +46,7 @@ public abstract class FieldsProducer ext
public FieldsEnum iterator() throws IOException {
return FieldsEnum.EMPTY;
}
-
- @Override
- public void loadTermsIndex(int indexDivisor) throws IOException {
-
- }
-
+
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Tue Aug 23 14:06:58 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IOContext.Context;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentInfo;
@@ -31,7 +30,6 @@ import org.apache.lucene.util.PagedBytes
import org.apache.lucene.util.packed.PackedInts;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Collection;
import java.util.Comparator;
import java.io.IOException;
@@ -75,6 +73,8 @@ public class FixedGapTermsIndexReader ex
this.termComp = termComp;
+ assert indexDivisor == -1 || indexDivisor > 0;
+
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);
boolean success = false;
@@ -251,7 +251,7 @@ public class FixedGapTermsIndexReader ex
}
}
- public void loadTermsIndex() throws IOException {
+ private void loadTermsIndex() throws IOException {
if (coreIndex == null) {
coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
}
@@ -375,29 +375,6 @@ public class FixedGapTermsIndexReader ex
}
}
- // Externally synced in IndexWriter
- @Override
- public void loadTermsIndex(int indexDivisor) throws IOException {
- if (!indexLoaded) {
-
- if (indexDivisor < 0) {
- this.indexDivisor = -indexDivisor;
- } else {
- this.indexDivisor = indexDivisor;
- }
- this.totalIndexInterval = indexInterval * this.indexDivisor;
-
- Iterator<FieldIndexData> it = fields.values().iterator();
- while(it.hasNext()) {
- it.next().loadTermsIndex();
- }
-
- indexLoaded = true;
- in.close();
- termBytesReader = termBytes.freeze(true);
- }
- }
-
@Override
public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
final FieldIndexData fieldData = fields.get(fieldInfo);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Tue Aug 23 14:06:58 2011
@@ -32,14 +32,23 @@ import org.apache.lucene.util.Bits;
*
* @lucene.experimental */
public class MergeState {
+
+ public static class IndexReaderAndLiveDocs {
+ public final IndexReader reader;
+ public final Bits liveDocs;
+
+ public IndexReaderAndLiveDocs(IndexReader reader, Bits liveDocs) {
+ this.reader = reader;
+ this.liveDocs = liveDocs;
+ }
+ }
+
public FieldInfos fieldInfos;
- public List<IndexReader> readers; // Readers being merged
+ public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
public int readerCount; // Number of readers being merged
public int[][] docMaps; // Maps docIDs around deletions
- public int[] delCounts; // Deletion count per reader
public int[] docBase; // New docID base per reader
public int mergedDocCount; // Total # merged docs
- public Bits multiLiveDocs;
public CheckAbort checkAbort;
// Updated per field;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Tue Aug 23 14:06:58 2011
@@ -26,9 +26,8 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; // javadocs
-
-/** BlockTermsReader interacts with a single instance
+/** The core terms dictionaries (BlockTermsReader,
+ * BlockTreeTermsReader) interact with a single instance
* of this class to manage creation of {@link DocsEnum} and
* {@link DocsAndPositionsEnum} instances. It provides an
* IndexInput (termsIn) where this class may read any
@@ -49,11 +48,11 @@ public abstract class PostingsReaderBase
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits liveDocs, DocsEnum reuse) throws IOException;
+ public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException;
+ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
public abstract void close() throws IOException;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java Tue Aug 23 14:06:58 2011
@@ -33,7 +33,11 @@ public abstract class PostingsWriterBase
public abstract void startTerm() throws IOException;
- public abstract void flushTermsBlock() throws IOException;
+ /** Flush count terms starting at start "backwards", as a
+ * block. start is a negative offset from the end of the
+ * terms stack, ie bigger start means further back in
+ * the stack. */
+ public abstract void flushTermsBlock(int start, int count) throws IOException;
/** Finishes the current term */
public abstract void finishTerm(TermStats stats) throws IOException;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Tue Aug 23 14:06:58 2011
@@ -69,7 +69,9 @@ public abstract class TermsConsumer {
MultiDocsEnum docsEnumIn = null;
while((term = termsEnum.next()) != null) {
- docsEnumIn = (MultiDocsEnum) termsEnum.docs(mergeState.multiLiveDocs, docsEnumIn);
+ // We can pass null for liveDocs, because the
+ // mapping enum will skip the non-live docs:
+ docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn);
if (docsEnumIn != null) {
docsEnum.reset(docsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
@@ -93,7 +95,9 @@ public abstract class TermsConsumer {
postingsEnum.setMergeState(mergeState);
MultiDocsAndPositionsEnum postingsEnumIn = null;
while((term = termsEnum.next()) != null) {
- postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiLiveDocs, postingsEnumIn);
+ // We can pass null for liveDocs, because the
+ // mapping enum will skip the non-live docs:
+ postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
if (postingsEnumIn != null) {
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Tue Aug 23 14:06:58 2011
@@ -43,8 +43,6 @@ public abstract class TermsIndexReaderBa
public abstract FieldIndexEnum getFieldEnum(FieldInfo fieldInfo);
- public abstract void loadTermsIndex(int indexDivisor) throws IOException;
-
public abstract void close() throws IOException;
public abstract void getExtensions(Collection<String> extensions);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java Tue Aug 23 14:06:58 2011
@@ -23,7 +23,6 @@ import java.io.OutputStreamWriter; // fo
import java.io.Writer; // for toDot
import java.util.Collection;
import java.util.HashMap;
-import java.util.Iterator;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -63,6 +62,7 @@ public class VariableGapTermsIndexReader
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
this.segment = segment;
boolean success = false;
+ assert indexDivisor == -1 || indexDivisor > 0;
try {
@@ -170,7 +170,7 @@ public class VariableGapTermsIndexReader
}
}
- public void loadTermsIndex() throws IOException {
+ private void loadTermsIndex() throws IOException {
if (fst == null) {
IndexInput clone = (IndexInput) in.clone();
clone.seek(indexStart);
@@ -205,27 +205,6 @@ public class VariableGapTermsIndexReader
}
}
- // Externally synced in IndexWriter
- @Override
- public void loadTermsIndex(int indexDivisor) throws IOException {
- if (!indexLoaded) {
-
- if (indexDivisor < 0) {
- this.indexDivisor = -indexDivisor;
- } else {
- this.indexDivisor = indexDivisor;
- }
-
- Iterator<FieldIndexData> it = fields.values().iterator();
- while(it.hasNext()) {
- it.next().loadTermsIndex();
- }
-
- indexLoaded = true;
- in.close();
- }
- }
-
@Override
public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
final FieldIndexData fieldData = fields.get(fieldInfo);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Tue Aug 23 14:06:58 2011
@@ -68,10 +68,14 @@ public abstract class FixedIntBlockIndex
}
@Override
- public void set(IntIndexOutput.Index other) throws IOException {
+ public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException {
Index idx = (Index) other;
- lastFP = fp = idx.fp;
- lastUpto = upto = idx.upto;
+ fp = idx.fp;
+ upto = idx.upto;
+ if (copyLast) {
+ lastFP = fp;
+ lastUpto = upto;
+ }
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Tue Aug 23 14:06:58 2011
@@ -77,10 +77,14 @@ public abstract class VariableIntBlockIn
}
@Override
- public void set(IntIndexOutput.Index other) throws IOException {
+ public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException {
Index idx = (Index) other;
- lastFP = fp = idx.fp;
- lastUpto = upto = idx.upto;
+ fp = idx.fp;
+ upto = idx.upto;
+ if (copyLast) {
+ lastFP = fp;
+ lastUpto = upto;
+ }
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java Tue Aug 23 14:06:58 2011
@@ -767,11 +767,6 @@ public class MemoryCodec extends Codec {
}
@Override
- public void loadTermsIndex(int indexDivisor) {
- // no op
- }
-
- @Override
public void close() {
// Drop ref to FST:
for(TermsReader termsReader : fields.values()) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Tue Aug 23 14:06:58 2011
@@ -171,30 +171,6 @@ public class PreFlexFields extends Field
}
@Override
- synchronized public void loadTermsIndex(int indexDivisor) throws IOException {
- if (tis == null) {
- Directory dir0;
- if (si.getUseCompoundFile()) {
- // In some cases, we were originally opened when CFS
- // was not used, but then we are asked to open the
- // terms reader with index, the segment has switched
- // to CFS
-
- if (!(dir instanceof CompoundFileDirectory)) {
- dir0 = cfsReader = dir.openCompoundInput(IndexFileNames.segmentFileName(si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context);
- } else {
- dir0 = dir;
- }
- dir0 = cfsReader;
- } else {
- dir0 = dir;
- }
-
- tis = new TermInfosReader(dir0, si.name, fieldInfos, context, indexDivisor);
- }
- }
-
- @Override
public void close() throws IOException {
if (tis != null) {
tis.close();
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Tue Aug 23 14:06:58 2011
@@ -22,28 +22,23 @@ import java.util.Set;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.BlockTreeTermsReader;
+import org.apache.lucene.index.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.PerDocConsumer;
-import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
import org.apache.lucene.index.codecs.PerDocValues;
-import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
-import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.BlockTermsReader;
-import org.apache.lucene.index.codecs.BlockTermsWriter;
-import org.apache.lucene.index.codecs.TermsIndexReaderBase;
-import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.IOUtils;
/** This codec "inlines" the postings for terms that have
* low docFreq. It wraps another codec, which is used for
@@ -56,59 +51,52 @@ import org.apache.lucene.util.IOUtils;
public class PulsingCodec extends Codec {
private final int freqCutoff;
+ private final int minBlockSize;
+ private final int maxBlockSize;
- /**
- * Creates a {@link PulsingCodec} with <tt>freqCutoff = 1</tt>
- *
- * @see PulsingCodec#PulsingCodec(int)
- */
public PulsingCodec() {
this(1);
}
+ public PulsingCodec(int freqCutoff) {
+ this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ }
+
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
- public PulsingCodec(int freqCutoff) {
+ public PulsingCodec(int freqCutoff, int minBlockSize, int maxBlockSize) {
super("Pulsing");
this.freqCutoff = freqCutoff;
+ this.minBlockSize = minBlockSize;
+ assert minBlockSize > 1;
+ this.maxBlockSize = maxBlockSize;
}
@Override
public String toString() {
- return name + "(freqCutoff=" + freqCutoff + ")";
+ return name + "(freqCutoff=" + freqCutoff + " minBlockSize=" + minBlockSize + " maxBlockSize=" + maxBlockSize + ")";
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- // We wrap StandardPostingsWriter, but any StandardPostingsWriter
+ // We wrap StandardPostingsWriter, but any PostingsWriterBase
// will work:
+
PostingsWriterBase docsWriter = new StandardPostingsWriter(state);
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
- PostingsWriterBase pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);
-
- // Terms dict index
- TermsIndexWriterBase indexWriter;
- boolean success = false;
- try {
- indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeSafely(true, pulsingWriter);
- }
- }
+ PostingsWriterBase pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
// Terms dict
- success = false;
+ boolean success = false;
try {
- FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
+ FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
- IOUtils.closeSafely(true, pulsingWriter, indexWriter);
+ pulsingWriter.close();
}
}
}
@@ -119,53 +107,34 @@ public class PulsingCodec extends Codec
// We wrap StandardPostingsReader, but any StandardPostingsReader
// will work:
PostingsReaderBase docsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.context, state.codecId);
- PostingsReaderBase pulsingReader = new PulsingPostingsReaderImpl(docsReader);
-
- // Terms dict index reader
- TermsIndexReaderBase indexReader;
+ PostingsReaderBase pulsingReader = new PulsingPostingsReader(docsReader);
boolean success = false;
try {
- indexReader = new VariableGapTermsIndexReader(state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- state.termsIndexDivisor,
- state.codecId, state.context);
+ FieldsProducer ret = new BlockTreeTermsReader(
+ state.dir, state.fieldInfos, state.segmentInfo.name,
+ pulsingReader,
+ state.context,
+ state.codecId,
+ state.termsIndexDivisor);
success = true;
+ return ret;
} finally {
if (!success) {
pulsingReader.close();
}
}
+ }
- // Terms dict reader
- success = false;
- try {
- FieldsProducer ret = new BlockTermsReader(indexReader,
- state.dir, state.fieldInfos, state.segmentInfo.name,
- pulsingReader,
- state.context,
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
- success = true;
- return ret;
- } finally {
- if (!success) {
- try {
- pulsingReader.close();
- } finally {
- indexReader.close();
- }
- }
- }
+ public int getFreqCutoff() {
+ return freqCutoff;
}
@Override
- public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
- StandardPostingsReader.files(dir, segmentInfo, id, files);
- BlockTermsReader.files(dir, segmentInfo, id, files);
- VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
- DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
+ public void files(Directory dir, SegmentInfo segmentInfo, int codecID, Set<String> files) throws IOException {
+ StandardPostingsReader.files(dir, segmentInfo, codecID, files);
+ BlockTreeTermsReader.files(dir, segmentInfo, codecID, files);
+ DefaultDocValuesConsumer.files(dir, segmentInfo, codecID, files, getDocValuesUseCFS());
}
@Override
@@ -173,7 +142,7 @@ public class PulsingCodec extends Codec
StandardCodec.getStandardExtensions(extensions);
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
}
-
+
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Tue Aug 23 14:06:58 2011
@@ -44,7 +44,7 @@ public abstract class IntIndexOutput imp
public abstract void mark() throws IOException;
/** Copies index from other */
- public abstract void set(Index other) throws IOException;
+ public abstract void copyFrom(Index other, boolean copyLast) throws IOException;
/** Writes "location" of current output pointer of primary
* output to different output (out) */
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java Tue Aug 23 14:06:58 2011
@@ -124,12 +124,12 @@ class SepSkipListWriter extends MultiLev
Arrays.fill(lastSkipDoc, 0);
Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
for(int i=0;i<numberOfSkipLevels;i++) {
- docIndex[i].set(topDocIndex);
+ docIndex[i].copyFrom(topDocIndex, true);
if (freqOutput != null) {
- freqIndex[i].set(topFreqIndex);
+ freqIndex[i].copyFrom(topFreqIndex, true);
}
if (posOutput != null) {
- posIndex[i].set(topPosIndex);
+ posIndex[i].copyFrom(topPosIndex, true);
}
}
if (payloadOutput != null) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Tue Aug 23 14:06:58 2011
@@ -593,10 +593,6 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public void loadTermsIndex(int indexDivisor) {
- }
-
- @Override
public void close() throws IOException {
in.close();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Tue Aug 23 14:06:58 2011
@@ -22,31 +22,37 @@ import java.util.Set;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.BlockTreeTermsReader;
+import org.apache.lucene.index.codecs.BlockTreeTermsWriter;
import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
+import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.PerDocConsumer;
-import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
import org.apache.lucene.index.codecs.PerDocValues;
-import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermsIndexWriterBase;
-import org.apache.lucene.index.codecs.TermsIndexReaderBase;
-import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
-import org.apache.lucene.index.codecs.BlockTermsWriter;
-import org.apache.lucene.index.codecs.BlockTermsReader;
-import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
import org.apache.lucene.store.Directory;
/** Default codec.
* @lucene.experimental */
public class StandardCodec extends Codec {
+ private final int minBlockSize;
+ private final int maxBlockSize;
+
public StandardCodec() {
+ this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ }
+
+ public StandardCodec(int minBlockSize, int maxBlockSize) {
super("Standard");
+ this.minBlockSize = minBlockSize;
+ assert minBlockSize > 1;
+ this.maxBlockSize = maxBlockSize;
}
@Override
@@ -57,29 +63,14 @@ public class StandardCodec extends Codec
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
- TermsIndexWriterBase indexWriter;
boolean success = false;
try {
- indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
- success = true;
- } finally {
- if (!success) {
- docs.close();
- }
- }
-
- success = false;
- try {
- FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
+ FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
- try {
- docs.close();
- } finally {
- indexWriter.close();
- }
+ docs.close();
}
}
}
@@ -89,41 +80,22 @@ public class StandardCodec extends Codec
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new StandardPostingsReader(state.dir, state.segmentInfo, state.context, state.codecId);
- TermsIndexReaderBase indexReader;
boolean success = false;
try {
- indexReader = new VariableGapTermsIndexReader(state.dir,
+ FieldsProducer ret = new BlockTreeTermsReader(
+ state.dir,
state.fieldInfos,
state.segmentInfo.name,
- state.termsIndexDivisor,
- state.codecId, state.context);
- success = true;
- } finally {
- if (!success) {
- postings.close();
- }
- }
-
- success = false;
- try {
- FieldsProducer ret = new BlockTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postings,
- state.context,
- TERMS_CACHE_SIZE,
- state.codecId);
+ postings,
+ state.context,
+ state.codecId,
+ state.termsIndexDivisor);
success = true;
return ret;
} finally {
if (!success) {
- try {
- postings.close();
- } finally {
- indexReader.close();
- }
+ postings.close();
}
}
}
@@ -135,11 +107,10 @@ public class StandardCodec extends Codec
static final String PROX_EXTENSION = "prx";
@Override
- public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
- StandardPostingsReader.files(dir, segmentInfo, id, files);
- BlockTermsReader.files(dir, segmentInfo, id, files);
- VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
- DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
+ public void files(Directory dir, SegmentInfo segmentInfo, int codecID, Set<String> files) throws IOException {
+ StandardPostingsReader.files(dir, segmentInfo, codecID, files);
+ BlockTreeTermsReader.files(dir, segmentInfo, codecID, files);
+ DefaultDocValuesConsumer.files(dir, segmentInfo, codecID, files, getDocValuesUseCFS());
}
@Override
@@ -151,8 +122,12 @@ public class StandardCodec extends Codec
public static void getStandardExtensions(Set<String> extensions) {
extensions.add(FREQ_EXTENSION);
extensions.add(PROX_EXTENSION);
- BlockTermsReader.getExtensions(extensions);
- VariableGapTermsIndexReader.getIndexExtensions(extensions);
+ BlockTreeTermsReader.getExtensions(extensions);
+ }
+
+ @Override
+ public String toString() {
+ return name + "(minBlockSize=" + minBlockSize + " maxBlockSize=" + maxBlockSize + ")";
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Tue Aug 23 14:06:58 2011
@@ -27,8 +27,8 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -46,22 +46,23 @@ public class StandardPostingsReader exte
private final IndexInput freqIn;
private final IndexInput proxIn;
+ // public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
int skipInterval;
int maxSkipLevels;
int skipMinimum;
- //private String segment;
+ // private String segment;
- public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, IOContext context, int codecId) throws IOException {
+ public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, IOContext ioContext, int codecId) throws IOException {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
- context);
- //this.segment = segmentInfo.name;
+ ioContext);
+ // this.segment = segmentInfo.name;
if (segmentInfo.getHasProx()) {
boolean success = false;
try {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.PROX_EXTENSION),
- context);
+ ioContext);
success = true;
} finally {
if (!success) {
@@ -73,10 +74,10 @@ public class StandardPostingsReader exte
}
}
- public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection<String> files) throws IOException {
- files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.FREQ_EXTENSION));
+ public static void files(Directory dir, SegmentInfo segmentInfo, int codecID, Collection<String> files) throws IOException {
+ files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecID, StandardCodec.FREQ_EXTENSION));
if (segmentInfo.getHasProx()) {
- files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.PROX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecID, StandardCodec.PROX_EXTENSION));
}
}
@@ -100,7 +101,7 @@ public class StandardPostingsReader exte
// Only used by the "primary" TermState -- clones don't
// copy this (basically they are "transient"):
- ByteArrayDataInput bytesReader;
+ ByteArrayDataInput bytesReader; // TODO: should this NOT be in the TermState...?
byte[] bytes;
@Override
@@ -155,7 +156,8 @@ public class StandardPostingsReader exte
final StandardTermState termState = (StandardTermState) _termState;
final int len = termsIn.readVInt();
- //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
+
+ // if (DEBUG) System.out.println(" SPR.readTermsBlock bytes=" + len + " ts=" + _termState);
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
termState.bytesReader = new ByteArrayDataInput();
@@ -171,21 +173,25 @@ public class StandardPostingsReader exte
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
- //System.out.println("StandardR.nextTerm seg=" + segment);
- final boolean isFirstTerm = termState.termCount == 0;
+ // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
+ final boolean isFirstTerm = termState.termBlockOrd == 0;
if (isFirstTerm) {
termState.freqOffset = termState.bytesReader.readVLong();
} else {
termState.freqOffset += termState.bytesReader.readVLong();
}
- //System.out.println(" dF=" + termState.docFreq);
- //System.out.println(" freqFP=" + termState.freqOffset);
+ /*
+ if (DEBUG) {
+ System.out.println(" dF=" + termState.docFreq);
+ System.out.println(" freqFP=" + termState.freqOffset);
+ }
+ */
assert termState.freqOffset < freqIn.length();
if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
- //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
+ // if (DEBUG) System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
} else {
// undefined
@@ -197,7 +203,7 @@ public class StandardPostingsReader exte
} else {
termState.proxOffset += termState.bytesReader.readVLong();
}
- //System.out.println(" proxFP=" + termState.proxOffset);
+ // if (DEBUG) System.out.println(" proxFP=" + termState.proxOffset);
}
}
@@ -215,6 +221,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsEnum(freqIn);
}
}
+ // if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
}
@@ -300,7 +307,7 @@ public class StandardPostingsReader exte
assert limit > 0;
ord = 0;
doc = 0;
- //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
+ // if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
@@ -309,8 +316,10 @@ public class StandardPostingsReader exte
@Override
public int nextDoc() throws IOException {
+ //if (DEBUG) System.out.println(" stpr.nextDoc seg=" + segment + " fp=" + freqIn.getFilePointer());
while(true) {
if (ord == limit) {
+ //if (DEBUG) System.out.println(" return doc=" + NO_MORE_DOCS);
return doc = NO_MORE_DOCS;
}
@@ -318,6 +327,7 @@ public class StandardPostingsReader exte
// Decode next doc/freq pair
final int code = freqIn.readVInt();
+ // if (DEBUG) System.out.println(" code=" + code);
if (omitTF) {
doc += code;
} else {
@@ -334,6 +344,7 @@ public class StandardPostingsReader exte
}
}
+ //if (DEBUG) System.out.println(" stpr.nextDoc return doc=" + doc);
return doc;
}
@@ -480,16 +491,17 @@ public class StandardPostingsReader exte
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
- //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
+ // if (DEBUG) System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@Override
public int nextDoc() throws IOException {
+ // if (DEBUG) System.out.println("SPR.nextDoc seg=" + segment + " freqIn.fp=" + freqIn.getFilePointer());
while(true) {
if (ord == limit) {
- //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
+ // if (DEBUG) System.out.println(" return END");
return doc = NO_MORE_DOCS;
}
@@ -513,7 +525,7 @@ public class StandardPostingsReader exte
position = 0;
- //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
+ // if (DEBUG) System.out.println(" return doc=" + doc);
return doc;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Tue Aug 23 14:06:58 2011
@@ -21,6 +21,8 @@ package org.apache.lucene.index.codecs.s
* index file format */
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
@@ -34,18 +36,19 @@ import org.apache.lucene.store.IndexOutp
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */
public final class StandardPostingsWriter extends PostingsWriterBase {
- final static String CODEC = "StandardPostingsWriterImpl";
+ final static String CODEC = "StandardPostingsWriter";
+
+ //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
// Increment version to change it:
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
- IndexOutput freqOut;
- IndexOutput proxOut;
+ final IndexOutput freqOut;
+ final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
@@ -70,52 +73,42 @@ public final class StandardPostingsWrite
IndexOptions indexOptions;
boolean storePayloads;
// Starts a new term
- long lastFreqStart;
long freqStart;
- long lastProxStart;
long proxStart;
FieldInfo fieldInfo;
int lastPayloadLength;
int lastPosition;
- private int pendingCount;
-
- //private String segment;
-
- private RAMOutputStream bytesWriter = new RAMOutputStream();
+ // private String segment;
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
this(state, DEFAULT_SKIP_INTERVAL);
}
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
+ super();
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
- //this.segment = state.segmentName;
+ // this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName, state.context);
- boolean success = false;
- try {
- if (state.fieldInfos.hasProx()) {
- // At least one field does not omit TF, so create the
- // prox file
- fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
- proxOut = state.directory.createOutput(fileName, state.context);
- } else {
- // Every field omits TF so we will write no prox file
- proxOut = null;
- }
-
- totalNumDocs = state.numDocs;
-
- skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels,
- state.numDocs, freqOut, proxOut);
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeSafely(true, freqOut, proxOut);
- }
+ if (state.fieldInfos.hasProx()) {
+ // At least one field does not omit TF, so create the
+ // prox file
+ fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
+ proxOut = state.directory.createOutput(fileName, state.context);
+ } else {
+ // Every field omits TF so we will write no prox file
+ proxOut = null;
}
+
+ totalNumDocs = state.numDocs;
+
+ skipListWriter = new DefaultSkipListWriter(skipInterval,
+ maxSkipLevels,
+ state.numDocs,
+ freqOut,
+ proxOut);
}
@Override
@@ -129,8 +122,8 @@ public final class StandardPostingsWrite
@Override
public void startTerm() {
- //System.out.println("StandardW: startTerm seg=" + segment + " pendingCount=" + pendingCount);
freqStart = freqOut.getFilePointer();
+ //if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart);
if (proxOut != null) {
proxStart = proxOut.getFilePointer();
// force first payload to write its length
@@ -144,6 +137,13 @@ public final class StandardPostingsWrite
@Override
public void setField(FieldInfo fieldInfo) {
//System.out.println("SPW: setField");
+ /*
+ if (BlockTreeTermsWriter.DEBUG && fieldInfo.name.equals("id")) {
+ DEBUG = true;
+ } else {
+ DEBUG = false;
+ }
+ */
this.fieldInfo = fieldInfo;
indexOptions = fieldInfo.indexOptions;
storePayloads = fieldInfo.storePayloads;
@@ -158,7 +158,7 @@ public final class StandardPostingsWrite
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
- //System.out.println("StandardW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq);
+ // if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer());
final int delta = docID - lastDocID;
@@ -189,13 +189,13 @@ public final class StandardPostingsWrite
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
- //System.out.println("StandardW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
+ //if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
assert proxOut != null;
final int delta = position - lastPosition;
-
- assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition;
+
+ assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
@@ -222,57 +222,104 @@ public final class StandardPostingsWrite
public void finishDoc() {
}
+ private static class PendingTerm {
+ public final long freqStart;
+ public final long proxStart;
+ public final int skipOffset;
+
+ public PendingTerm(long freqStart, long proxStart, int skipOffset) {
+ this.freqStart = freqStart;
+ this.proxStart = proxStart;
+ this.skipOffset = skipOffset;
+ }
+ }
+
+ private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
+
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
- //System.out.println("StandardW.finishTerm seg=" + segment);
+
+ // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
- final boolean isFirstTerm = pendingCount == 0;
- //System.out.println(" isFirstTerm=" + isFirstTerm);
-
- //System.out.println(" freqFP=" + freqStart);
- if (isFirstTerm) {
- bytesWriter.writeVLong(freqStart);
+ final int skipOffset;
+ if (df >= skipMinimum) {
+ skipOffset = (int) (skipListWriter.writeSkip(freqOut)-freqStart);
} else {
- bytesWriter.writeVLong(freqStart-lastFreqStart);
+ skipOffset = -1;
}
- lastFreqStart = freqStart;
- if (df >= skipMinimum) {
- bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
- }
+ pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- //System.out.println(" proxFP=" + proxStart);
- if (isFirstTerm) {
- bytesWriter.writeVLong(proxStart);
- } else {
- bytesWriter.writeVLong(proxStart - lastProxStart);
- }
- lastProxStart = proxStart;
- }
-
lastDocID = 0;
df = 0;
- pendingCount++;
}
+ private final RAMOutputStream bytesWriter = new RAMOutputStream();
+
@Override
- public void flushTermsBlock() throws IOException {
- //System.out.println("SPW.flushBlock pendingCount=" + pendingCount);
+ public void flushTermsBlock(int start, int count) throws IOException {
+ //if (DEBUG) System.out.println("SPW: flushTermsBlock start=" + start + " count=" + count + " left=" + (pendingTerms.size()-count) + " pendingTerms.size()=" + pendingTerms.size());
+
+ if (count == 0) {
+ termsOut.writeByte((byte) 0);
+ return;
+ }
+
+ assert start <= pendingTerms.size();
+ assert count <= start;
+
+ final int limit = pendingTerms.size() - start + count;
+ final PendingTerm firstTerm = pendingTerms.get(limit - count);
+ // First term in block is abs coded:
+ bytesWriter.writeVLong(firstTerm.freqStart);
+
+ if (firstTerm.skipOffset != -1) {
+ assert firstTerm.skipOffset > 0;
+ bytesWriter.writeVInt(firstTerm.skipOffset);
+ }
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ bytesWriter.writeVLong(firstTerm.proxStart);
+ }
+ long lastFreqStart = firstTerm.freqStart;
+ long lastProxStart = firstTerm.proxStart;
+ for(int idx=limit-count+1; idx<limit; idx++) {
+ final PendingTerm term = pendingTerms.get(idx);
+ //if (DEBUG) System.out.println(" write term freqStart=" + term.freqStart);
+ // The rest of the terms term are delta coded:
+ bytesWriter.writeVLong(term.freqStart - lastFreqStart);
+ lastFreqStart = term.freqStart;
+ if (term.skipOffset != -1) {
+ assert term.skipOffset > 0;
+ bytesWriter.writeVInt(term.skipOffset);
+ }
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ bytesWriter.writeVLong(term.proxStart - lastProxStart);
+ lastProxStart = term.proxStart;
+ }
+ }
+
termsOut.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(termsOut);
bytesWriter.reset();
- pendingCount = 0;
+
+ // Remove the terms we just wrote:
+ pendingTerms.subList(limit-count, limit).clear();
}
@Override
public void close() throws IOException {
- IOUtils.closeSafely(false, freqOut, proxOut);
+ try {
+ freqOut.close();
+ } finally {
+ if (proxOut != null) {
+ proxOut.close();
+ }
+ }
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java Tue Aug 23 14:06:58 2011
@@ -22,14 +22,11 @@ import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton;
-import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
-import org.apache.lucene.util.automaton.MinimizationOperations;
-import org.apache.lucene.util.automaton.SpecialOperations;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* A {@link Query} that will match terms against a finite-state machine.
@@ -41,7 +38,7 @@ import org.apache.lucene.util.automaton.
* the standard Lucene wildcard syntax with {@link WildcardQuery}.
* </p>
* <p>
- * When the query is executed, it will create an equivalent minimal DFA of the
+ * When the query is executed, it will create an equivalent DFA of the
* finite-state machine, and will enumerate the term dictionary in an
* intelligent way to reduce the number of comparisons. For example: the regular
* expression of <code>[dl]og?</code> will make approximately four comparisons:
@@ -52,20 +49,10 @@ import org.apache.lucene.util.automaton.
public class AutomatonQuery extends MultiTermQuery {
/** the automaton to match index terms against */
protected final Automaton automaton;
+ protected final CompiledAutomaton compiled;
/** term containing the field, and possibly some pattern structure */
protected final Term term;
- /**
- * abstraction for returning a termsenum:
- * in the ctor the query computes one of these, the actual
- * implementation depends upon the automaton's structure.
- */
- private abstract class TermsEnumFactory {
- protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException;
- }
-
- private final TermsEnumFactory factory;
-
/**
* Create a new AutomatonQuery from an {@link Automaton}.
*
@@ -78,73 +65,12 @@ public class AutomatonQuery extends Mult
super(term.field());
this.term = term;
this.automaton = automaton;
- MinimizationOperations.minimize(automaton);
-
- if (BasicOperations.isEmpty(automaton)) {
- // matches nothing
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return TermsEnum.EMPTY;
- }
- };
- } else if (BasicOperations.isTotal(automaton)) {
- // matches all possible strings
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return terms.iterator();
- }
- };
- } else {
- final String singleton;
- final String commonPrefix;
-
- if (automaton.getSingleton() == null) {
- commonPrefix = SpecialOperations.getCommonPrefix(automaton);
- if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
- singleton = commonPrefix;
- } else {
- singleton = null;
- }
- } else {
- commonPrefix = null;
- singleton = automaton.getSingleton();
- }
-
- if (singleton != null) {
- // matches a fixed string in singleton or expanded representation
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new SingleTermsEnum(terms.iterator(), new Term(field, singleton));
- }
- };
- } else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate(
- BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) {
- // matches a constant prefix
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new PrefixTermsEnum(terms.iterator(), new Term(field, commonPrefix));
- }
- };
- } else {
- final AutomatonTermsEnum.CompiledAutomaton compiled =
- new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton));
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new AutomatonTermsEnum(terms.iterator(), compiled);
- }
- };
- }
- }
+ this.compiled = new CompiledAutomaton(automaton);
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return factory.getTermsEnum(terms, atts);
+ return compiled.getTermsEnum(terms);
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Tue Aug 23 14:06:58 2011
@@ -18,11 +18,15 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
import org.apache.lucene.search.similarities.SimilarityProvider;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
+import org.apache.lucene.search.TermQuery.TermWeight;
import java.io.IOException;
import java.util.*;
@@ -167,17 +171,24 @@ public class BooleanQuery extends Query
protected ArrayList<Weight> weights;
protected int maxCoord; // num optional + num required
private final boolean disableCoord;
+ private final boolean termConjunction;
public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
throws IOException {
this.similarityProvider = searcher.getSimilarityProvider();
this.disableCoord = disableCoord;
weights = new ArrayList<Weight>(clauses.size());
+ boolean termConjunction = clauses.isEmpty() || minNrShouldMatch != 0 ? false : true;
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
- weights.add(c.getQuery().createWeight(searcher));
+ Weight w = c.getQuery().createWeight(searcher);
+ if (!(c.isRequired() && (w instanceof TermWeight))) {
+ termConjunction = false;
+ }
+ weights.add(w);
if (!c.isProhibited()) maxCoord++;
}
+ this.termConjunction = termConjunction;
}
@Override
@@ -291,6 +302,10 @@ public class BooleanQuery extends Query
@Override
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext)
throws IOException {
+ if (termConjunction) {
+ // specialized scorer for term conjunctions
+ return createConjunctionTermScorer(context);
+ }
List<Scorer> required = new ArrayList<Scorer>();
List<Scorer> prohibited = new ArrayList<Scorer>();
List<Scorer> optional = new ArrayList<Scorer>();
@@ -329,6 +344,23 @@ public class BooleanQuery extends Query
// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
+
+ private Scorer createConjunctionTermScorer(AtomicReaderContext context)
+ throws IOException {
+ final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
+ for (int i = 0; i < docsAndFreqs.length; i++) {
+ final TermWeight weight = (TermWeight) weights.get(i);
+ final TermsEnum termsEnum = weight.getTermsEnum(context);
+ if (termsEnum == null) {
+ return null;
+ }
+ final ExactDocScorer docScorer = weight.createDocScorer(context);
+ docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docs(
+ context.reader.getLiveDocs(), null), termsEnum.docFreq(), docScorer);
+ }
+ return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
+ docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
+ }
@Override
public boolean scoresDocsOutOfOrder() {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Tue Aug 23 14:06:58 2011
@@ -18,6 +18,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
@@ -353,26 +355,11 @@ final class BooleanScorer extends Scorer
}
@Override
- protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
- super.visitSubScorers(parent, relationship, visitor);
- final Query q = weight.getQuery();
- SubScorer sub = scorers;
- while(sub != null) {
- // TODO: re-enable this if BQ ever sends us required
- //clauses
- //if (sub.required) {
- //relationship = Occur.MUST;
- if (!sub.prohibited) {
- relationship = Occur.SHOULD;
- } else {
- // TODO: maybe it's pointless to do this, but, it is
- // possible the doc may still be collected, eg foo
- // OR (bar -fee)
- relationship = Occur.MUST_NOT;
- }
- sub.scorer.visitSubScorers(q, relationship, visitor);
- sub = sub.next;
+ public Collection<ChildScorer> getChildren() {
+ List<ChildScorer> children = new ArrayList<ChildScorer>();
+ for (SubScorer sub = scorers; sub != null; sub = sub.next) {
+ children.add(new ChildScorer(sub.scorer, sub.prohibited ? Occur.MUST_NOT.toString() : Occur.SHOULD.toString()));
}
+ return children;
}
-
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Tue Aug 23 14:06:58 2011
@@ -19,11 +19,13 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.Scorer.ChildScorer;
/* See the description in BooleanScorer.java, comparing
* BooleanScorer & BooleanScorer2 */
@@ -319,17 +321,17 @@ class BooleanScorer2 extends Scorer {
}
@Override
- protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
- super.visitSubScorers(parent, relationship, visitor);
- final Query q = weight.getQuery();
+ public Collection<ChildScorer> getChildren() {
+ ArrayList<ChildScorer> children = new ArrayList<ChildScorer>();
for (Scorer s : optionalScorers) {
- s.visitSubScorers(q, Occur.SHOULD, visitor);
+ children.add(new ChildScorer(s, Occur.SHOULD.toString()));
}
for (Scorer s : prohibitedScorers) {
- s.visitSubScorers(q, Occur.MUST_NOT, visitor);
+ children.add(new ChildScorer(s, Occur.MUST_NOT.toString()));
}
for (Scorer s : requiredScorers) {
- s.visitSubScorers(q, Occur.MUST, visitor);
+ children.add(new ChildScorer(s, Occur.MUST.toString()));
}
+ return children;
}
}