You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/04/14 21:35:44 UTC
svn commit: r764930 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/search/FilteredTermEnum.java
src/java/org/apache/lucene/search/MultiTermQuery.java
Author: mikemccand
Date: Tue Apr 14 19:35:43 2009
New Revision: 764930
URL: http://svn.apache.org/viewvc?rev=764930&view=rev
Log:
LUCENE-1603: some improvements to MultiTermQuery/FilteredTermEnum
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/search/FilteredTermEnum.java
lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=764930&r1=764929&r2=764930&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Apr 14 19:35:43 2009
@@ -278,6 +278,12 @@
turnaround than the normal approach of commiting the changes and
then reopening a reader. (Jason Rutherglen via Mike McCandless)
+21. LUCENE-1603: Some improvements to MultiTermQuery: return
+ DocIdSet.EMPTY_DOCIDSET if there are no terms in the enum; track
+ the total number of terms it visited during rewrite
+ (getTotalNumberOfTerms). Also, FilteredTermEnum is now more
+ friendly to subclasses. (Uwe Schindler via Mike McCandless)
+
Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FilteredTermEnum.java?rev=764930&r1=764929&r2=764930&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FilteredTermEnum.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FilteredTermEnum.java Tue Apr 14 19:35:43 2009
@@ -26,8 +26,11 @@
<p>Term enumerations are always ordered by Term.compareTo(). Each term in
the enumeration is greater than all that precede it. */
public abstract class FilteredTermEnum extends TermEnum {
- private Term currentTerm = null;
- private TermEnum actualEnum = null;
+ /** the current term */
+ protected Term currentTerm = null;
+
+ /** the delegate enum - to set this member use {@link #setEnum} */
+ protected TermEnum actualEnum = null;
public FilteredTermEnum() {}
@@ -40,6 +43,10 @@
/** Indicates the end of the enumeration has been reached */
protected abstract boolean endEnum();
+ /**
+ * use this method to set the actual TermEnum (e.g. in ctor),
+ * it will be automatically positioned on the first matching term.
+ */
protected void setEnum(TermEnum actualEnum) throws IOException {
this.actualEnum = actualEnum;
// Find the first term that matches
@@ -54,7 +61,8 @@
* Returns -1 if no Term matches or all terms have been enumerated.
*/
public int docFreq() {
- if (actualEnum == null) return -1;
+ if (currentTerm == null) return -1;
+ assert actualEnum != null;
return actualEnum.docFreq();
}
@@ -85,7 +93,7 @@
/** Closes the enumeration to further activity, freeing resources. */
public void close() throws IOException {
- actualEnum.close();
+ if (actualEnum != null) actualEnum.close();
currentTerm = null;
actualEnum = null;
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=764930&r1=764929&r2=764930&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQuery.java Tue Apr 14 19:35:43 2009
@@ -45,6 +45,7 @@
public abstract class MultiTermQuery extends Query {
protected Term term;
protected boolean constantScoreRewrite = false;
+ transient int numberOfTerms = 0;
/** Constructs a query for terms matching <code>term</code>. */
public MultiTermQuery(Term term) {
@@ -67,6 +68,34 @@
protected abstract FilteredTermEnum getEnum(IndexReader reader)
throws IOException;
+ /**
+ * Expert: Return the number of unique terms visited during execution of the query.
+ * If there are many of them, you may consider using another query type
+ * or optimize your total term count in index.
+ * <p>This method is not thread safe, be sure to only call it when no query is running!
+ * If you re-use the same query instance for another
+ * search, be sure to first reset the term counter
+ * with {@link #clearTotalNumberOfTerms}.
+ * <p>On optimized indexes / no MultiReaders, you get the correct number of
+ * unique terms for the whole index. Use this number to compare different queries.
+ * For non-optimized indexes this number can also be achived in
+ * non-constant-score mode. In constant-score mode you get the total number of
+ * terms seeked for all segments / sub-readers.
+ * @see #clearTotalNumberOfTerms
+ */
+ public int getTotalNumberOfTerms() {
+ return numberOfTerms;
+ }
+
+ /**
+ * Expert: Resets the counting of unique terms.
+ * Do this before executing the query/filter.
+ * @see #getTotalNumberOfTerms
+ */
+ public void clearTotalNumberOfTerms() {
+ numberOfTerms = 0;
+ }
+
protected Filter getFilter() {
return new MultiTermFilter(this);
}
@@ -79,6 +108,7 @@
do {
Term t = enumerator.term();
if (t != null) {
+ numberOfTerms++;
TermQuery tq = new TermQuery(t); // found a match
tq.setBoost(getBoost() * enumerator.difference()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
@@ -150,14 +180,14 @@
MultiTermQuery mtq;
abstract class TermGenerator {
- public void generate(IndexReader reader) throws IOException {
- TermEnum enumerator = mtq.getEnum(reader);
+ public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
+ mtq.numberOfTerms++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
@@ -165,7 +195,6 @@
} while (enumerator.next());
} finally {
termDocs.close();
- enumerator.close();
}
}
abstract public void handleDoc(int doc);
@@ -176,28 +205,40 @@
}
public BitSet bits(IndexReader reader) throws IOException {
- final BitSet bitSet = new BitSet(reader.maxDoc());
- new TermGenerator() {
- public void handleDoc(int doc) {
- bitSet.set(doc);
- }
- }.generate(reader);
- return bitSet;
+ final TermEnum enumerator = mtq.getEnum(reader);
+ try {
+ final BitSet bitSet = new BitSet(reader.maxDoc());
+ new TermGenerator() {
+ public void handleDoc(int doc) {
+ bitSet.set(doc);
+ }
+ }.generate(reader, enumerator);
+ return bitSet;
+ } finally {
+ enumerator.close();
+ }
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
- new TermGenerator() {
- public void handleDoc(int doc) {
- bitSet.set(doc);
- }
- }.generate(reader);
-
- return bitSet;
+ final TermEnum enumerator = mtq.getEnum(reader);
+ try {
+ // if current term in enum is null, the enum is empty -> shortcut
+ if (enumerator.term() == null)
+ return DocIdSet.EMPTY_DOCIDSET;
+ // else fill into a OpenBitSet
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ new TermGenerator() {
+ public void handleDoc(int doc) {
+ bitSet.set(doc);
+ }
+ }.generate(reader, enumerator);
+ return bitSet;
+ } finally {
+ enumerator.close();
+ }
}
public boolean equals(Object o) {
-
if (this == o)
return true;
if (!(o instanceof MultiTermFilter))