You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/14 14:09:42 UTC
svn commit: r1058995 [1/2] - in /lucene/dev/branches/bulkpostings: ./ lucene/
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/src/java/org/apache/lucene/in...
Author: rmuir
Date: Fri Jan 14 13:09:40 2011
New Revision: 1058995
URL: http://svn.apache.org/viewvc?rev=1058995&view=rev
Log:
merge revision 1058328 (see my nocommits, we need to integrate bulkpostingsenum and termstate still)
Added:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/OrdTermState.java
- copied unchanged from r1058328, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermState.java
- copied unchanged from r1058328, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
- copied unchanged from r1058328, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java
- copied unchanged from r1058328, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java
Removed:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermState.java
Modified:
lucene/dev/branches/bulkpostings/ (props changed)
lucene/dev/branches/bulkpostings/lucene/ (props changed)
lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/QueryUtils.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
lucene/dev/branches/bulkpostings/solr/ (props changed)
lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java
Modified: lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/CHANGES.txt?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/CHANGES.txt Fri Jan 14 13:09:40 2011
@@ -359,6 +359,11 @@ Optimizations
not seek backwards when a sub-range has no terms. It now only seeks
when the current term is less than the next sub-range's lower end.
(Uwe Schindler, Mike McCandless)
+
+* LUCENE-2694: Optimize MultiTermQuery to be single pass for Term lookups.
+ MultiTermQuery now stores TermState per leaf reader during rewrite to re-
+ seek the term dictionary in TermQuery / TermWeight.
+ (Simon Willnauer, Mike McCandless, Robert Muir)
Documentation
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Fri Jan 14 13:09:40 2011
@@ -18,11 +18,15 @@ package org.apache.lucene.store.instanti
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
+
+import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
@@ -92,10 +96,6 @@ public class InstantiatedTermsEnum exten
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public BytesRef term() {
return br;
}
@@ -141,5 +141,18 @@ public class InstantiatedTermsEnum exten
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public TermState termState() throws IOException {
+ final OrdTermState state = new OrdTermState();
+ state.ord = upto - start;
+ return state;
+ }
+
+ @Override
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ assert state != null && state instanceof OrdTermState;
+ return seek(((OrdTermState)state).ord); // just use the ord for simplicity
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Jan 14 13:09:40 2011
@@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldS
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.FieldsEnum;
@@ -885,10 +887,6 @@ public class MemoryIndex implements Seri
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public long ord() {
return termUpto;
}
@@ -929,8 +927,21 @@ public class MemoryIndex implements Seri
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
- }
+ @Override
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ return this.seek(((OrdTermState)state).ord);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ OrdTermState ts = new OrdTermState();
+ ts.ord = termUpto;
+ return ts;
+ }
+ }
+
private class MemoryDocsEnum extends DocsEnum {
private ArrayIntList positions;
private boolean hasNext;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Fri Jan 14 13:09:40 2011
@@ -372,7 +372,6 @@ class BufferedDeletes {
Query query = entry.getKey();
int limit = entry.getValue().intValue();
Weight weight = query.weight(searcher);
-
Scorer scorer = weight.scorer(readerContext, true, false);
if (scorer != null) {
while(true) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Fri Jan 14 13:09:40 2011
@@ -131,11 +131,6 @@ public class FilterIndexReader extends I
}
@Override
- public void cacheCurrentTerm() throws IOException {
- in.cacheCurrentTerm();
- }
-
- @Override
public SeekStatus seek(long ord) throws IOException {
return in.seek(ord);
}
@@ -179,6 +174,16 @@ public class FilterIndexReader extends I
public Comparator<BytesRef> getComparator() throws IOException {
return in.getComparator();
}
+
+ @Override
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ return in.seek(term, state);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ return in.termState();
+ }
}
// nocommit need FilteredBulkDocsEnum
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java Fri Jan 14 13:09:40 2011
@@ -1086,6 +1086,47 @@ public abstract class IndexReader implem
return null;
}
}
+
+ /**
+ * Returns {@link DocsEnum} for the specified field and
+ * {@link TermState}. This may return null, if either the field or the term
+ * does not exists or the {@link TermState} is invalid for the underlying
+ * implementation.*/
+ public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ assert field != null;
+ final Fields fields = fields();
+ if (fields == null) {
+ return null;
+ }
+ final Terms terms = fields.terms(field);
+ if (terms != null) {
+ return terms.docs(skipDocs, term, state, null);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Returns {@link DocsAndPositionsEnum} for the specified field and
+ * {@link TermState}. This may return null, if either the field or the term
+ * does not exists, the {@link TermState} is invalid for the underlying
+ * implementation, or positions were not stored for this term.*/
+ public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ assert field != null;
+ final Fields fields = fields();
+ if (fields == null) {
+ return null;
+ }
+ final Terms terms = fields.terms(field);
+ if (terms != null) {
+ return terms.docsAndPositions(skipDocs, term, state, null);
+ } else {
+ return null;
+ }
+ }
+
/** Deletes the document numbered <code>docNum</code>. Once a document is
* deleted it will not appear in TermDocs or TermPositions enumerations.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java Fri Jan 14 13:09:40 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Fri Jan 14 13:09:40 2011
@@ -95,13 +95,6 @@ public final class MultiTermsEnum extend
}
@Override
- public void cacheCurrentTerm() throws IOException {
- for(int i=0;i<numTop;i++) {
- top[i].terms.cacheCurrentTerm();
- }
- }
-
- @Override
public Comparator<BytesRef> getComparator() {
return termComp;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java Fri Jan 14 13:09:40 2011
@@ -89,11 +89,57 @@ public abstract class Terms {
}
}
+ /**
+ * Expert: Get {@link DocsEnum} for the specified {@link TermState}.
+ * This method may return <code>null</code> if the term does not exist.
+ *
+ * @see TermsEnum#termState()
+ * @see TermsEnum#seek(BytesRef, TermState) */
+ public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
+ final TermsEnum termsEnum = getThreadTermsEnum();
+ if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+ return termsEnum.docs(skipDocs, reuse);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Get {@link DocsEnum} for the specified {@link TermState}. This
+ * method will may return <code>null</code> if the term does not exists, or positions were
+ * not indexed.
+ *
+ * @see TermsEnum#termState()
+ * @see TermsEnum#seek(BytesRef, TermState) */
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
+ final TermsEnum termsEnum = getThreadTermsEnum();
+ if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+ return termsEnum.docsAndPositions(skipDocs, reuse);
+ } else {
+ return null;
+ }
+ }
+
public long getUniqueTermCount() throws IOException {
throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
}
- protected TermsEnum getThreadTermsEnum() throws IOException {
+ /**
+ * Returns a thread-private {@link TermsEnum} instance. Obtaining
+ * {@link TermsEnum} from this method might be more efficient than using
+ * {@link #iterator()} directly since this method doesn't necessarily create a
+ * new {@link TermsEnum} instance.
+ * <p>
+ * NOTE: {@link TermsEnum} instances obtained from this method must not be
+ * shared across threads. The enum should only be used within a local context
+ * where other threads can't access it.
+ *
+ * @return a thread-private {@link TermsEnum} instance
+ * @throws IOException
+ * if an IOException occurs
+ * @lucene.internal
+ */
+ public TermsEnum getThreadTermsEnum() throws IOException {
TermsEnum termsEnum = threadEnums.get();
if (termsEnum == null) {
termsEnum = iterator();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java Fri Jan 14 13:09:40 2011
@@ -73,7 +73,34 @@ public abstract class TermsEnum {
* may be before or after the current ord. See {@link
* #seek(BytesRef)}. */
public abstract SeekStatus seek(long ord) throws IOException;
-
+
+ /**
+ * Expert: Seeks a specific position by {@link TermState} previously obtained
+ * from {@link #termState()}. Callers should maintain the {@link TermState} to
+ * use this method. Low-level implementations may position the TermsEnum
+ * without re-seeking the term dictionary.
+ * <p>
+ * Seeking by {@link TermState} should only be used iff the enu the state was
+ * obtained from and the enum the state is used for seeking are obtained from
+ * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can
+ * leave the enum in undefined state.
+ * <p>
+ * NOTE: Using this method with an incompatible {@link TermState} might leave
+ * this {@link TermsEnum} in undefined state. On a segment level
+ * {@link TermState} instances are compatible only iff the source and the
+ * target {@link TermsEnum} operate on the same field. If operating on segment
+ * level, TermState instances must not be used across segments.
+ * <p>
+ * NOTE: A seek by {@link TermState} might not restore the
+ * {@link AttributeSource}'s state. {@link AttributeSource} states must be
+ * maintained separately if this method is used.
+ * @param term the term the TermState corresponds to
+ * @param state the {@link TermState}
+ * */
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ return seek(term);
+ }
+
/** Increments the enumeration to the next element.
* Returns the resulting term, or null if the end was
* hit. The returned BytesRef may be re-used across calls
@@ -98,7 +125,7 @@ public abstract class TermsEnum {
* first time, after next() returns null or seek returns
* {@link SeekStatus#END}.*/
public abstract int docFreq();
-
+
/** Get {@link DocsEnum} for the current term. Do not
* call this before calling {@link #next} or {@link
* #seek} for the first time. This method will not
@@ -121,6 +148,25 @@ public abstract class TermsEnum {
* the postings by this codec. */
public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+ /**
+ * Expert: Returns the TermsEnums internal state to position the TermsEnum
+ * without re-seeking the term dictionary.
+ * <p>
+ * NOTE: A seek by {@link TermState} might not capture the
+ * {@link AttributeSource}'s state. Callers must maintain the
+ * {@link AttributeSource} states separately
+ *
+ * @see TermState
+ * @see #seek(BytesRef, TermState)
+ */
+ public TermState termState() throws IOException {
+ return new TermState() {
+ @Override
+ public void copyFrom(TermState other) {
+ }
+ };
+ }
+
/** Return the {@link BytesRef} Comparator used to sort
* terms provided by the iterator. This may return
* null if there are no terms. Callers may invoke this
@@ -128,10 +174,6 @@ public abstract class TermsEnum {
* instance & reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
- /** Optional optimization hint: informs the codec that the
- * current term is likely to be re-seek'd-to soon. */
- public abstract void cacheCurrentTerm() throws IOException;
-
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}
* <p><em>Please note:</em> This enum should be unmodifiable,
@@ -147,9 +189,6 @@ public abstract class TermsEnum {
public SeekStatus seek(long ord) { return SeekStatus.END; }
@Override
- public void cacheCurrentTerm() {}
-
- @Override
public BytesRef term() {
throw new IllegalStateException("this method should never be called");
}
@@ -188,5 +227,15 @@ public abstract class TermsEnum {
public synchronized AttributeSource attributes() {
return super.attributes();
}
+
+ @Override
+ public TermState termState() throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
+
+ @Override
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
};
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Fri Jan 14 13:09:40 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -43,13 +44,13 @@ public abstract class PostingsReaderBase
public abstract void init(IndexInput termsIn) throws IOException;
/** Return a newly created empty TermState */
- public abstract TermState newTermState() throws IOException;
+ public abstract PrefixCodedTermState newTermState() throws IOException;
- public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState state, boolean isIndexTerm) throws IOException;
+ public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
+ public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
// nocommit jdocs
// nocommit make abstract
@@ -59,7 +60,7 @@ public abstract class PostingsReaderBase
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
public abstract void close() throws IOException;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Fri Jan 14 13:09:40 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
@@ -69,7 +70,7 @@ public class PrefixCodedTermsReader exte
private final Comparator<BytesRef> termComp;
// Caches the most recently looked-up field + terms:
- private final DoubleBarrelLRUCache<FieldAndTerm,TermState> termsCache;
+ private final DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState> termsCache;
// Reads the terms index
private TermsIndexReaderBase indexReader;
@@ -85,11 +86,6 @@ public class PrefixCodedTermsReader exte
public FieldAndTerm() {
}
- public FieldAndTerm(String field, BytesRef term) {
- this.field = field;
- this.term = new BytesRef(term);
- }
-
public FieldAndTerm(FieldAndTerm other) {
field = other.field;
term = new BytesRef(other.term);
@@ -117,7 +113,7 @@ public class PrefixCodedTermsReader exte
throws IOException {
this.postingsReader = postingsReader;
- termsCache = new DoubleBarrelLRUCache<FieldAndTerm,TermState>(termsCacheSize);
+ termsCache = new DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState>(termsCacheSize);
this.termComp = termComp;
@@ -279,10 +275,10 @@ public class PrefixCodedTermsReader exte
}
// Iterates through terms in this field, not supporting ord()
- private class SegmentTermsEnum extends TermsEnum {
+ private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in;
private final DeltaBytesReader bytesReader;
- private final TermState state;
+ private final PrefixCodedTermState state;
private boolean seekPending;
private final FieldAndTerm fieldTerm = new FieldAndTerm();
private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
@@ -308,14 +304,6 @@ public class PrefixCodedTermsReader exte
return termComp;
}
- @Override
- public void cacheCurrentTerm() {
- TermState stateCopy = (TermState) state.clone();
- stateCopy.filePointer = in.getFilePointer();
- termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term),
- stateCopy);
- }
-
// called only from assert
private boolean first;
private int indexTermCount;
@@ -343,7 +331,7 @@ public class PrefixCodedTermsReader exte
* is found, SeekStatus.NOT_FOUND if a different term
* was found, SeekStatus.END if we hit EOF */
@Override
- public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+ public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException {
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
@@ -358,9 +346,8 @@ public class PrefixCodedTermsReader exte
cachedState = termsCache.get(fieldTerm);
if (cachedState != null) {
state.copyFrom(cachedState);
- seekPending = true;
+ setTermState(term, state);
positioned = false;
- bytesReader.term.copy(term);
//System.out.println(" cached!");
return SeekStatus.FOUND;
}
@@ -441,12 +428,7 @@ public class PrefixCodedTermsReader exte
// Done!
if (useCache) {
- // Store in cache
- FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
- cachedState = (TermState) state.clone();
- // this is fp after current term
- cachedState.filePointer = in.getFilePointer();
- termsCache.put(entryKey, cachedState);
+ cacheTerm(fieldTerm);
}
return SeekStatus.FOUND;
@@ -466,6 +448,23 @@ public class PrefixCodedTermsReader exte
return SeekStatus.END;
}
+ private final void setTermState(BytesRef term, final TermState termState) {
+ assert termState != null && termState instanceof PrefixCodedTermState;
+ state.copyFrom(termState);
+ seekPending = true;
+ bytesReader.term.copy(term);
+ }
+
+ private final void cacheTerm(FieldAndTerm other) {
+ // Store in cache
+ final FieldAndTerm entryKey = new FieldAndTerm(other);
+ final PrefixCodedTermState cachedState = (PrefixCodedTermState) state.clone();
+ // this is fp after current term
+ cachedState.filePointer = in.getFilePointer();
+ termsCache.put(entryKey, cachedState);
+ }
+
+
@Override
public BytesRef term() {
return bytesReader.term;
@@ -500,7 +499,9 @@ public class PrefixCodedTermsReader exte
postingsReader.readTerm(in,
fieldInfo, state,
isIndexTerm);
- state.ord++;
+ if (doOrd) {
+ state.ord++;
+ }
positioned = true;
//System.out.println("te.next term=" + bytesReader.term.utf8ToString());
@@ -514,7 +515,7 @@ public class PrefixCodedTermsReader exte
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
- DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
+ final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
assert docsEnum != null;
return docsEnum;
}
@@ -535,6 +536,23 @@ public class PrefixCodedTermsReader exte
}
@Override
+ public SeekStatus seek(BytesRef term, TermState otherState) throws IOException {
+ assert otherState != null && otherState instanceof PrefixCodedTermState;
+ assert otherState.getClass() == this.state.getClass() : "Illegal TermState type " + otherState.getClass();
+ assert ((PrefixCodedTermState)otherState).ord < numTerms;
+ setTermState(term, otherState);
+ positioned = false;
+ return SeekStatus.FOUND;
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ final PrefixCodedTermState newTermState = (PrefixCodedTermState) state.clone();
+ newTermState.filePointer = in.getFilePointer();
+ return newTermState;
+ }
+
+ @Override
public SeekStatus seek(long ord) throws IOException {
if (indexEnum == null) {
@@ -570,7 +588,6 @@ public class PrefixCodedTermsReader exte
return SeekStatus.FOUND;
}
- @Override
public long ord() {
if (!doOrd) {
throw new UnsupportedOperationException();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Fri Jan 14 13:09:40 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.index.FieldsEnu
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.CompoundFileReader;
@@ -744,11 +745,6 @@ public class PreFlexFields extends Field
}
@Override
- public void cacheCurrentTerm() throws IOException {
- getTermsDict().cacheCurrentTerm(termEnum);
- }
-
- @Override
public SeekStatus seek(long ord) throws IOException {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Fri Jan 14 13:09:40 2011
@@ -23,8 +23,9 @@ import org.apache.lucene.index.DocsAndPo
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -56,10 +57,10 @@ public class PulsingPostingsReaderImpl e
wrappedPostingsReader.init(termsIn);
}
- private static class PulsingTermState extends TermState {
+ private static class PulsingTermState extends PrefixCodedTermState {
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
- private TermState wrappedTermState;
+ private PrefixCodedTermState wrappedTermState;
private boolean pendingIndexTerm;
@Override
@@ -71,7 +72,7 @@ public class PulsingPostingsReaderImpl e
System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
} else {
assert wrappedTermState != null;
- clone.wrappedTermState = (TermState) wrappedTermState.clone();
+ clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone();
}
return clone;
}
@@ -102,15 +103,14 @@ public class PulsingPostingsReaderImpl e
}
@Override
- public TermState newTermState() throws IOException {
+ public PrefixCodedTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
-
+ public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
termState.pendingIndexTerm |= isIndexTerm;
@@ -137,7 +137,7 @@ public class PulsingPostingsReaderImpl e
// TODO: we could actually reuse, by having TL that
// holds the last wrapped reuse, and vice-versa
@Override
- public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
@@ -187,7 +187,7 @@ public class PulsingPostingsReaderImpl e
// TODO: -- not great that we can't always reuse
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) {
return null;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri Jan 14 13:09:40 2011
@@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsAndPo
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -130,12 +131,13 @@ public class SepPostingsReaderImpl exten
}
}
- private static class SepTermState extends TermState {
+ private static final class SepTermState extends PrefixCodedTermState {
// We store only the seek point to the docs file because
// the rest of the info (freqIndex, posIndex, etc.) is
// stored in the docs file:
IntIndexInput.Index docIndex;
-
+
+ @Override
public Object clone() {
SepTermState other = (SepTermState) super.clone();
other.docIndex = (IntIndexInput.Index) docIndex.clone();
@@ -155,19 +157,19 @@ public class SepPostingsReaderImpl exten
}
@Override
- public TermState newTermState() throws IOException {
+ public PrefixCodedTermState newTermState() throws IOException {
final SepTermState state = new SepTermState();
state.docIndex = docIn.index();
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException {
+ public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException {
((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum) || !((SepDocsEnum) reuse).canReuse(docIn)) {
@@ -201,7 +203,7 @@ public class SepPostingsReaderImpl exten
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Jan 14 13:09:40 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -153,10 +154,6 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public BytesRef next() throws IOException {
assert !ended;
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.next();
@@ -215,7 +212,7 @@ class SimpleTextFieldsReader extends Fie
}
return docsAndPositionsEnum.reset(docsStart, skipDocs);
}
-
+
@Override
public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreq, boolean doPositions) throws IOException {
SimpleTextBulkPostingsEnum bulkPostingsEnum;
@@ -635,7 +632,6 @@ class SimpleTextFieldsReader extends Fie
}
private class SimpleTextTerms extends Terms {
- private final String field;
private final long termsStart;
private final boolean omitTF;
private FST<PairOutputs.Pair<Long,Long>> fst;
@@ -643,7 +639,6 @@ class SimpleTextFieldsReader extends Fie
private final BytesRef scratch = new BytesRef(10);
public SimpleTextTerms(String field, long termsStart) throws IOException {
- this.field = StringHelper.intern(field);
this.termsStart = termsStart;
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
loadTerms();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Fri Jan 14 13:09:40 2011
@@ -27,8 +27,9 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -84,20 +85,20 @@ public class StandardPostingsReader exte
}
// Must keep final because we do non-standard clone
- private final static class DocTermState extends TermState {
+ private final static class StandardTermState extends PrefixCodedTermState {
long freqOffset;
long proxOffset;
int skipOffset;
public Object clone() {
- DocTermState other = new DocTermState();
+ StandardTermState other = new StandardTermState();
other.copyFrom(this);
return other;
}
public void copyFrom(TermState _other) {
super.copyFrom(_other);
- DocTermState other = (DocTermState) _other;
+ StandardTermState other = (StandardTermState) _other;
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
@@ -109,8 +110,8 @@ public class StandardPostingsReader exte
}
@Override
- public TermState newTermState() {
- return new DocTermState();
+ public PrefixCodedTermState newTermState() {
+ return new StandardTermState();
}
@Override
@@ -127,10 +128,9 @@ public class StandardPostingsReader exte
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm)
+ public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm)
throws IOException {
-
- final DocTermState docTermState = (DocTermState) termState;
+ final StandardTermState docTermState = (StandardTermState) termState;
if (isIndexTerm) {
docTermState.freqOffset = termsIn.readVLong();
@@ -154,7 +154,7 @@ public class StandardPostingsReader exte
}
@Override
- public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
@@ -167,7 +167,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsEnum(freqIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
}
private SegmentBulkPostingsEnum lastBulkEnum;
@@ -179,7 +179,7 @@ public class StandardPostingsReader exte
if (lastBulkEnum != null && reuse == lastBulkEnum) {
// fastpath
- return lastBulkEnum.reset((DocTermState) termState);
+ return lastBulkEnum.reset((StandardTermState) termState);
} else {
final SegmentBulkPostingsEnum postingsEnum;
if (reuse == null || !(reuse instanceof SegmentBulkPostingsEnum) || !((SegmentBulkPostingsEnum) reuse).canReuse(fieldInfo, freqIn, doFreqs, doPositions)) {
@@ -188,12 +188,12 @@ public class StandardPostingsReader exte
postingsEnum = (SegmentBulkPostingsEnum) reuse;
}
this.lastBulkEnum = postingsEnum;
- return postingsEnum.reset((DocTermState) termState);
+ return postingsEnum.reset((StandardTermState) termState);
}
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
}
@@ -212,7 +212,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
} else {
SegmentDocsAndPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
@@ -226,7 +226,7 @@ public class StandardPostingsReader exte
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
}
}
- return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+ return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
}
}
@@ -256,7 +256,7 @@ public class StandardPostingsReader exte
this.freqIn = (IndexInput) freqIn.clone();
}
- public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
omitTF = fieldInfo.omitTermFreqAndPositions;
if (omitTF) {
freq = 1;
@@ -399,7 +399,7 @@ public class StandardPostingsReader exte
this.proxIn = (IndexInput) proxIn.clone();
}
- public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert !fieldInfo.storePayloads;
@@ -587,7 +587,7 @@ public class StandardPostingsReader exte
this.proxIn = (IndexInput) proxIn.clone();
}
- public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+ public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
assert fieldInfo.storePayloads;
if (payload == null) {
@@ -1049,7 +1049,7 @@ public class StandardPostingsReader exte
return positionDeltasReader;
}
- public SegmentBulkPostingsEnum reset(DocTermState termState) throws IOException {
+ public SegmentBulkPostingsEnum reset(StandardTermState termState) throws IOException {
freqOffset = termState.freqOffset;
freqIn.seek(freqOffset);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Fri Jan 14 13:09:40 2011
@@ -21,9 +21,15 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PerReaderTermState;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
@@ -71,8 +77,8 @@ class ConstantScoreAutoRewrite extends T
}
@Override
- protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) {
- topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD);
+ protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) {
+ topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD);
}
@Override
@@ -98,9 +104,10 @@ class ConstantScoreAutoRewrite extends T
final BytesRefHash pendingTerms = col.pendingTerms;
final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
for(int i = 0; i < size; i++) {
+ final int pos = sort[i];
// docFreq is not used for constant score here, we pass 1
// to explicitely set a fake value, so it's not calculated
- addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f);
+ addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
}
// Strip scores
final Query result = new ConstantScoreQuery(bq);
@@ -123,12 +130,21 @@ class ConstantScoreAutoRewrite extends T
@Override
public boolean collect(BytesRef bytes) throws IOException {
- pendingTerms.add(bytes);
+ int pos = pendingTerms.add(bytes);
docVisitCount += termsEnum.docFreq();
if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
hasCutOff = true;
return false;
}
+
+ final TermState termState = termsEnum.termState();
+ assert termState != null;
+ if (pos < 0) {
+ pos = (-pos)-1;
+ array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq());
+ } else {
+ array.termState[pos] = new PerReaderTermState(topReaderContext, termState, readerContext.ord, termsEnum.docFreq());
+ }
return true;
}
@@ -137,7 +153,8 @@ class ConstantScoreAutoRewrite extends T
TermsEnum termsEnum;
final int docCountCutoff, termCountLimit;
- final BytesRefHash pendingTerms = new BytesRefHash();
+ final TermStateByteStart array = new TermStateByteStart(16);
+ final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
}
@Override
@@ -166,4 +183,40 @@ class ConstantScoreAutoRewrite extends T
return true;
}
+
+ /** Special implementation of BytesStartArray that keeps parallel arrays for {@link PerReaderTermState} */
+ static final class TermStateByteStart extends DirectBytesStartArray {
+ PerReaderTermState[] termState;
+
+ public TermStateByteStart(int initSize) {
+ super(initSize);
+ }
+
+ @Override
+ public int[] init() {
+ final int[] ord = super.init();
+ termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ assert termState.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] grow() {
+ final int[] ord = super.grow();
+ if (termState.length < ord.length) {
+ PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(termState, 0, tmpTermState, 0, termState.length);
+ termState = tmpTermState;
+ }
+ assert termState.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] clear() {
+ termState = null;
+ return super.clear();
+ }
+
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Fri Jan 14 13:09:40 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
@@ -162,12 +163,24 @@ public abstract class FilteredTermsEnum
public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
return tenum.docsAndPositions(bits, reuse);
}
-
+
+ /** This enum does not support seeking!
+ * @throws UnsupportedOperationException
+ */
@Override
- public void cacheCurrentTerm() throws IOException {
- tenum.cacheCurrentTerm();
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
-
+
+ /**
+ * Returns the filtered enums term state
+ */
+ @Override
+ public TermState termState() throws IOException {
+ assert tenum != null;
+ return tenum.termState();
+ }
+
@SuppressWarnings("fallthrough")
@Override
public BytesRef next() throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Jan 14 13:09:40 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
@@ -245,11 +246,6 @@ public final class FuzzyTermsEnum extend
}
@Override
- public void cacheCurrentTerm() throws IOException {
- actualEnum.cacheCurrentTerm();
- }
-
- @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
return actualEnum.docs(skipDocs, reuse);
}
@@ -260,6 +256,15 @@ public final class FuzzyTermsEnum extend
return actualEnum.docsAndPositions(skipDocs, reuse);
}
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ return actualEnum.seek(term, state);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ return actualEnum.termState();
+ }
+
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return actualEnum.getComparator();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Fri Jan 14 13:09:40 2011
@@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.PerReaderTermState;
/**
* An abstract {@link Query} that matches documents
@@ -159,8 +160,8 @@ public abstract class MultiTermQuery ext
}
@Override
- protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) {
- final TermQuery tq = new TermQuery(term, docCount);
+ protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) {
+ final TermQuery tq = new TermQuery(term, states);
tq.setBoost(boost);
topLevel.add(tq, BooleanClause.Occur.SHOULD);
}
@@ -200,8 +201,8 @@ public abstract class MultiTermQuery ext
}
@Override
- protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) {
- final Query q = new ConstantScoreQuery(new TermQuery(term, docFreq));
+ protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) {
+ final Query q = new ConstantScoreQuery(new TermQuery(term, states));
q.setBoost(boost);
topLevel.add(q, BooleanClause.Occur.SHOULD);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java Fri Jan 14 13:09:40 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
@@ -27,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PerReaderTermState;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
@@ -53,8 +55,9 @@ public abstract class ScoringRewrite<Q e
}
@Override
- protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) {
- final TermQuery tq = new TermQuery(term, docCount);
+ protected void addClause(BooleanQuery topLevel, Term term, int docCount,
+ float boost, PerReaderTermState states) {
+ final TermQuery tq = new TermQuery(term, states);
tq.setBoost(boost);
topLevel.add(tq, BooleanClause.Occur.SHOULD);
}
@@ -114,13 +117,13 @@ public abstract class ScoringRewrite<Q e
final int size = col.terms.size();
if (size > 0) {
final int sort[] = col.terms.sort(col.termsEnum.getComparator());
- final int[] docFreq = col.array.docFreq;
final float[] boost = col.array.boost;
+ final PerReaderTermState[] termStates = col.array.termState;
for (int i = 0; i < size; i++) {
final int pos = sort[i];
final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef()));
- assert reader.docFreq(term) == docFreq[pos];
- addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]);
+ assert reader.docFreq(term) == termStates[pos].docFreq();
+ addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]);
}
}
query.incTotalNumberOfTerms(size);
@@ -143,15 +146,17 @@ public abstract class ScoringRewrite<Q e
@Override
public boolean collect(BytesRef bytes) throws IOException {
final int e = terms.add(bytes);
+ final TermState state = termsEnum.termState();
+ assert state != null;
if (e < 0 ) {
// duplicate term: update docFreq
final int pos = (-e)-1;
- array.docFreq[pos] += termsEnum.docFreq();
+ array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq());
assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
} else {
// new entry: we populate the entry initially
- array.docFreq[e] = termsEnum.docFreq();
array.boost[e] = boostAtt.getBoost();
+ array.termState[e] = new PerReaderTermState(topReaderContext, state, readerContext.ord, termsEnum.docFreq());
ScoringRewrite.this.checkMaxClauseCount(terms.size());
}
return true;
@@ -160,8 +165,8 @@ public abstract class ScoringRewrite<Q e
/** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
static final class TermFreqBoostByteStart extends DirectBytesStartArray {
- int[] docFreq;
float[] boost;
+ PerReaderTermState[] termState;
public TermFreqBoostByteStart(int initSize) {
super(initSize);
@@ -171,24 +176,28 @@ public abstract class ScoringRewrite<Q e
public int[] init() {
final int[] ord = super.init();
boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)];
- docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
- assert boost.length >= ord.length && docFreq.length >= ord.length;
+ termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ assert termState.length >= ord.length && boost.length >= ord.length;
return ord;
}
@Override
public int[] grow() {
final int[] ord = super.grow();
- docFreq = ArrayUtil.grow(docFreq, ord.length);
boost = ArrayUtil.grow(boost, ord.length);
- assert boost.length >= ord.length && docFreq.length >= ord.length;
+ if (termState.length < ord.length) {
+ PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(termState, 0, tmpTermState, 0, termState.length);
+ termState = tmpTermState;
+ }
+ assert termState.length >= ord.length && boost.length >= ord.length;
return ord;
}
@Override
public int[] clear() {
boost = null;
- docFreq = null;
+ termState = null;
return super.clear();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java Fri Jan 14 13:09:40 2011
@@ -18,8 +18,6 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Comparator;
import org.apache.lucene.index.Fields;
@@ -27,25 +25,33 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
import org.apache.lucene.util.ReaderUtil;
abstract class TermCollectingRewrite<Q extends Query> extends MultiTermQuery.RewriteMethod {
+
/** Return a suitable top-level Query for holding all expanded terms. */
protected abstract Q getTopLevelQuery() throws IOException;
/** Add a MultiTermQuery term to the top-level query */
- protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException;
+ protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException {
+ addClause(topLevel, term, docCount, boost, null);
+ }
+
+ protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException;
+
protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
- final List<IndexReader> subReaders = new ArrayList<IndexReader>();
- ReaderUtil.gatherSubReaders(subReaders, reader);
+ ReaderContext topReaderContext = reader.getTopReaderContext();
Comparator<BytesRef> lastTermComp = null;
-
- for (IndexReader r : subReaders) {
- final Fields fields = r.fields();
+ final AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext);
+ for (AtomicReaderContext context : leaves) {
+ final Fields fields = context.reader.fields();
if (fields == null) {
// reader has no fields
continue;
@@ -68,11 +74,10 @@ abstract class TermCollectingRewrite<Q e
if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
lastTermComp = newTermComp;
-
+ collector.setReaderContext(topReaderContext, context);
collector.setNextEnum(termsEnum);
BytesRef bytes;
while ((bytes = termsEnum.next()) != null) {
- termsEnum.cacheCurrentTerm();
if (!collector.collect(bytes))
return; // interrupt whole term collection, so also don't iterate other subReaders
}
@@ -80,6 +85,14 @@ abstract class TermCollectingRewrite<Q e
}
protected static abstract class TermCollector {
+
+ protected AtomicReaderContext readerContext;
+ protected ReaderContext topReaderContext;
+
+ public void setReaderContext(ReaderContext topReaderContext, AtomicReaderContext readerContext) {
+ this.readerContext = readerContext;
+ this.topReaderContext = topReaderContext;
+ }
/** attributes used for communication with the enum */
public final AttributeSource attributes = new AttributeSource();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java Fri Jan 14 13:09:40 2011
@@ -23,11 +23,15 @@ import java.util.Set;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
import org.apache.lucene.search.Explanation.IDFExplanation;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing a term.
@@ -35,20 +39,22 @@ import org.apache.lucene.util.ToStringUt
*/
public class TermQuery extends Query {
private final Term term;
- private final int docFreq;
+ private int docFreq;
+ private transient PerReaderTermState perReaderTermState;
private class TermWeight extends Weight {
private final Similarity similarity;
private float value;
- private float idf;
+ private final float idf;
private float queryNorm;
private float queryWeight;
- private IDFExplanation idfExp;
- private transient ReaderContext weightContext; // only set if -ea for assert in scorer()
+ private final IDFExplanation idfExp;
+ private transient PerReaderTermState termStates;
- public TermWeight(IndexSearcher searcher)
+ public TermWeight(IndexSearcher searcher, PerReaderTermState termStates, int docFreq)
throws IOException {
- assert setWeightContext(searcher);
+ assert termStates != null : "PerReaderTermState must not be null";
+ this.termStates = termStates;
this.similarity = getSimilarity(searcher);
if (docFreq != -1) {
idfExp = similarity.idfExplain(term, searcher, docFreq);
@@ -82,18 +88,27 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
- assert assertTopReaderContext(context);
+ final String field = term.field();
final IndexReader reader = context.reader;
- assert reader.getSequentialSubReaders() == null;
- BulkPostingsEnum docs = reader.bulkTermPostingsEnum(term.field(),
- term.bytes(),
- true,
- false);
- if (docs == null) {
+ assert assertTopReaderContext(termStates, context) : "The top-reader used to create Weight is not the same as the current reader's top-reader";
+ final TermState state = termStates
+ .get(context.ord);
+ if (state == null) { // term is not present in that reader
+ assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader";
return null;
}
- // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
- final int docFreq = reader.docFreq(term.field(), term.bytes());
+
+ // nocommit: get bulkTermPostingsEnum with TermState
+ assert reader.getSequentialSubReaders() == null;
+ BulkPostingsEnum docs = reader.bulkTermPostingsEnum(term.field(),
+ term.bytes(),
+ true,
+ false);
+
+ assert docs != null; /* nocommit: shouldnt need this assert? termstate should catch this case above */
+
+ // nocommit: get the segment reader docfreq with TermState
+ final int docFreq = reader.docFreq(field, term.bytes());
final BlockReader docDeltas = docs.getDocDeltasReader();
final BlockReader frequencies = docs.getFreqsReader();
if (frequencies == null) {
@@ -105,18 +120,20 @@ public class TermQuery extends Query {
}
}
- private boolean assertTopReaderContext(ReaderContext context) {
- while (context.parent != null) {
+ private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
+ // only called from assert
+ final Terms terms = reader.terms(field);
+ return terms == null || terms.docFreq(bytes) == 0;
+ }
+
+ private boolean assertTopReaderContext(PerReaderTermState state, ReaderContext context) {
+ while(context.parent != null) {
context = context.parent;
}
- return weightContext == context;
+ return state.topReaderContext == context;
}
- private boolean setWeightContext(IndexSearcher searcher) {
- weightContext = searcher.getTopReaderContext();
- return true;
- }
-
+
@Override
public Explanation explain(AtomicReaderContext context, int doc)
throws IOException {
@@ -169,7 +186,7 @@ public class TermQuery extends Query {
fieldExpl.addDetail(expl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ final byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
@@ -205,6 +222,17 @@ public class TermQuery extends Query {
public TermQuery(Term t, int docFreq) {
term = t;
this.docFreq = docFreq;
+ perReaderTermState = null;
+ }
+
+ /** Expert: constructs a TermQuery that will use the
+ * provided docFreq instead of looking up the docFreq
+ * against the searcher. */
+ public TermQuery(Term t, PerReaderTermState states) {
+ assert states != null;
+ term = t;
+ docFreq = states.docFreq();
+ perReaderTermState = states;
}
/** Returns the term of this query. */
@@ -212,7 +240,21 @@ public class TermQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
- return new TermWeight(searcher);
+ final ReaderContext context = searcher.getTopReaderContext();
+ final int weightDocFreq;
+ final PerReaderTermState termState;
+ if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
+ // make TermQuery single-pass if we don't have a PRTS or if the context differs!
+ termState = PerReaderTermState.build(context, term, true); // cache term lookups!
+ // we must not ignore the given docFreq - if set use the given value
+ weightDocFreq = docFreq == -1 ? termState.docFreq() : docFreq;
+ } else {
+ // PRTS was pre-build for this IS
+ termState = this.perReaderTermState;
+ weightDocFreq = docFreq;
+ }
+
+ return new TermWeight(searcher, termState, weightDocFreq);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Fri Jan 14 13:09:40 2011
@@ -25,9 +25,11 @@ import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
/**
* Base rewrite method for collecting only the top terms
@@ -78,12 +80,12 @@ public abstract class TopTermsRewrite<Q
this.termComp = termsEnum.getComparator();
// lazy init the initial ScoreTerm because comparator is not known on ctor:
if (st == null)
- st = new ScoreTerm(this.termComp);
+ st = new ScoreTerm(this.termComp, new PerReaderTermState(topReaderContext));
boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
@Override
- public boolean collect(BytesRef bytes) {
+ public boolean collect(BytesRef bytes) throws IOException {
final float boost = boostAtt.getBoost();
// ignore uncompetetive hits
if (stQueue.size() == maxSize) {
@@ -94,23 +96,27 @@ public abstract class TopTermsRewrite<Q
return true;
}
ScoreTerm t = visitedTerms.get(bytes);
+ final TermState state = termsEnum.termState();
+ assert state != null;
if (t != null) {
// if the term is already in the PQ, only update docFreq of term in PQ
- t.docFreq += termsEnum.docFreq();
assert t.boost == boost : "boost should be equal in all segment TermsEnums";
+ t.termState.register(state, readerContext.ord, termsEnum.docFreq());
} else {
// add new entry in PQ, we must clone the term, else it may get overwritten!
st.bytes.copy(bytes);
st.boost = boost;
- st.docFreq = termsEnum.docFreq();
visitedTerms.put(st.bytes, st);
+ assert st.termState.docFreq() == 0;
+ st.termState.register(state, readerContext.ord, termsEnum.docFreq());
stQueue.offer(st);
// possibly drop entries from queue
if (stQueue.size() > maxSize) {
st = stQueue.poll();
visitedTerms.remove(st.bytes);
+ st.termState.clear(); // reset the termstate!
} else {
- st = new ScoreTerm(termComp);
+ st = new ScoreTerm(termComp, new PerReaderTermState(topReaderContext));
}
assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
// set maxBoostAtt with values to help FuzzyTermsEnum to optimize
@@ -120,6 +126,7 @@ public abstract class TopTermsRewrite<Q
maxBoostAtt.setCompetitiveTerm(t.bytes);
}
}
+
return true;
}
});
@@ -130,8 +137,8 @@ public abstract class TopTermsRewrite<Q
ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = placeholderTerm.createTerm(st.bytes);
- assert reader.docFreq(term) == st.docFreq;
- addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query
+ assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
+ addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
}
query.incTotalNumberOfTerms(scoreTerms.length);
return q;
@@ -147,7 +154,7 @@ public abstract class TopTermsRewrite<Q
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
- final TopTermsRewrite other = (TopTermsRewrite) obj;
+ final TopTermsRewrite<?> other = (TopTermsRewrite<?>) obj;
if (size != other.size) return false;
return true;
}
@@ -163,13 +170,12 @@ public abstract class TopTermsRewrite<Q
static final class ScoreTerm implements Comparable<ScoreTerm> {
public final Comparator<BytesRef> termComp;
-
public final BytesRef bytes = new BytesRef();
public float boost;
- public int docFreq;
-
- public ScoreTerm(Comparator<BytesRef> termComp) {
+ public final PerReaderTermState termState;
+ public ScoreTerm(Comparator<BytesRef> termComp, PerReaderTermState termState) {
this.termComp = termComp;
+ this.termState = termState;
}
public int compareTo(ScoreTerm other) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Fri Jan 14 13:09:40 2011
@@ -24,8 +24,11 @@ import org.apache.lucene.index.DocsAndPo
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.util.ArrayUtil;
@@ -304,11 +307,6 @@ public class DocTermsIndexCreator extend
}
@Override
- public void cacheCurrentTerm() throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
public BytesRef term() throws IOException {
return term;
}
@@ -337,6 +335,19 @@ public class DocTermsIndexCreator extend
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ assert state != null && state instanceof OrdTermState;
+ return this.seek(((OrdTermState)state).ord);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ OrdTermState state = new OrdTermState();
+ state.ord = currentOrd;
+ return state;
+ }
}
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java Fri Jan 14 13:09:40 2011
@@ -26,6 +26,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.BooleanClause.Occur; // javadocs only
+import org.apache.lucene.util.PerReaderTermState;
/**
* Wraps any {@link MultiTermQuery} as a {@link SpanQuery},
@@ -153,7 +154,7 @@ public class SpanMultiTermQueryWrapper<Q
}
@Override
- protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) {
+ protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) {
final SpanTermQuery q = new SpanTermQuery(term);
q.setBoost(boost);
topLevel.addClause(q);
@@ -202,7 +203,7 @@ public class SpanMultiTermQueryWrapper<Q
}
@Override
- protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) {
+ protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) {
final SpanTermQuery q = new SpanTermQuery(term);
q.setBoost(boost);
topLevel.addClause(q);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1058995&r1=1058994&r2=1058995&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Fri Jan 14 13:09:40 2011
@@ -18,6 +18,7 @@ package org.apache.lucene;
*/
import org.apache.lucene.util.*;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
@@ -330,10 +331,6 @@ public class TestExternalCodecs extends
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs);
}