You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/04/06 21:19:36 UTC
svn commit: r931278 [7/10] - in /lucene/dev/trunk: lucene/
lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/index/
lucene/backwards/src/java/org/apache/lucene/index/codecs/
lucene/backwards/src/java/org/apache/lucene/search/ lucene/bac...
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Apr 6 19:19:27 2010
@@ -26,9 +26,12 @@ import java.util.Map;
import java.util.WeakHashMap;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.FieldCacheSanityChecker;
@@ -277,22 +280,29 @@ class FieldCacheImpl implements FieldCac
return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER);
}
final byte[] retArray = new byte[reader.maxDoc()];
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- byte termval = parser.parseByte(term.text());
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final byte termval = parser.parseByte(term);
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
return retArray;
}
@@ -324,22 +334,29 @@ class FieldCacheImpl implements FieldCac
return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER);
}
final short[] retArray = new short[reader.maxDoc()];
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- short termval = parser.parseShort(term.text());
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final short termval = parser.parseShort(term);
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
return retArray;
}
@@ -375,27 +392,41 @@ class FieldCacheImpl implements FieldCac
}
}
int[] retArray = null;
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- int termval = parser.parseInt(term.text());
- if (retArray == null) // late init
- retArray = new int[reader.maxDoc()];
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final int termval = parser.parseInt(term);
+ if (retArray == null) {
+ // late init so numeric fields don't double allocate
+ retArray = new int[reader.maxDoc()];
+ }
+
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
- if (retArray == null) // no values
+
+ if (retArray == null) {
+ // no values
retArray = new int[reader.maxDoc()];
+ }
return retArray;
}
}
@@ -431,29 +462,43 @@ class FieldCacheImpl implements FieldCac
} catch (NumberFormatException ne) {
return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER);
}
- }
+ }
float[] retArray = null;
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- float termval = parser.parseFloat(term.text());
- if (retArray == null) // late init
- retArray = new float[reader.maxDoc()];
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final float termval = parser.parseFloat(term);
+ if (retArray == null) {
+ // late init so numeric fields don't double allocate
+ retArray = new float[reader.maxDoc()];
+ }
+
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
- if (retArray == null) // no values
+
+ if (retArray == null) {
+ // no values
retArray = new float[reader.maxDoc()];
+ }
return retArray;
}
}
@@ -487,27 +532,41 @@ class FieldCacheImpl implements FieldCac
}
}
long[] retArray = null;
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term(field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- long termval = parser.parseLong(term.text());
- if (retArray == null) // late init
- retArray = new long[reader.maxDoc()];
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final long termval = parser.parseLong(term);
+ if (retArray == null) {
+ // late init so numeric fields don't double allocate
+ retArray = new long[reader.maxDoc()];
+ }
+
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
- if (retArray == null) // no values
+
+ if (retArray == null) {
+ // no values
retArray = new long[reader.maxDoc()];
+ }
return retArray;
}
}
@@ -543,24 +602,35 @@ class FieldCacheImpl implements FieldCac
}
}
double[] retArray = null;
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- double termval = parser.parseDouble(term.text());
- if (retArray == null) // late init
- retArray = new double[reader.maxDoc()];
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } catch (StopFillCacheException stop) {
- } finally {
- termDocs.close();
- termEnum.close();
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ try {
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ final double termval = parser.parseDouble(term);
+ if (retArray == null) {
+ // late init so numeric fields don't double allocate
+ retArray = new double[reader.maxDoc()];
+ }
+
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
+ } catch (StopFillCacheException stop) {
+ }
}
if (retArray == null) // no values
retArray = new double[reader.maxDoc()];
@@ -584,21 +654,27 @@ class FieldCacheImpl implements FieldCac
throws IOException {
String field = StringHelper.intern(entryKey.field);
final String[] retArray = new String[reader.maxDoc()];
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- String termval = term.text();
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = termval;
- }
- } while (termEnum.next());
- } finally {
- termDocs.close();
- termEnum.close();
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ docs = termsEnum.docs(delDocs, docs);
+ final String termval = term.utf8ToString();
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ retArray[docID] = termval;
+ }
+ }
}
return retArray;
}
@@ -621,8 +697,10 @@ class FieldCacheImpl implements FieldCac
String field = StringHelper.intern(entryKey.field);
final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()+1];
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
+
+ //System.out.println("FC: getStringIndex field=" + field);
+ Terms terms = MultiFields.getTerms(reader, field);
+
int t = 0; // current term number
// an entry for documents that have no terms in this field
@@ -631,24 +709,31 @@ class FieldCacheImpl implements FieldCac
// needs to change as well.
mterms[t++] = null;
- try {
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.iterator();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+ while(true) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
// store term text
- mterms[t] = term.text();
+ mterms[t] = term.utf8ToString();
+ //System.out.println("FC: ord=" + t + " term=" + term.toBytesString());
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- retArray[termDocs.doc()] = t;
+ docs = termsEnum.docs(delDocs, docs);
+ while (true) {
+ final int docID = docs.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ //System.out.println("FC: docID=" + docID);
+ retArray[docID] = t;
}
-
t++;
- } while (termEnum.next());
- } finally {
- termDocs.close();
- termEnum.close();
+ }
}
if (t == 0) {
@@ -658,16 +743,17 @@ class FieldCacheImpl implements FieldCac
} else if (t < mterms.length) {
// if there are less terms than documents,
// trim off the dead array space
- String[] terms = new String[t];
- System.arraycopy (mterms, 0, terms, 0, t);
- mterms = terms;
+ String[] newTerms = new String[t];
+ System.arraycopy (mterms, 0, newTerms, 0, t);
+ mterms = newTerms;
}
StringIndex value = new StringIndex (retArray, mterms);
+ //System.out.println("FC: done\n");
return value;
}
}
-
+
private volatile PrintStream infoStream;
public void setInfoStream(PrintStream stream) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Tue Apr 6 19:19:27 2010
@@ -19,8 +19,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.document.NumericField; // for javadocs
/**
@@ -119,9 +120,9 @@ public abstract class FieldCacheRangeFil
assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0;
- // for this DocIdSet, we never need to use TermDocs,
+ // for this DocIdSet, we can ignore deleted docs
// because deleted docs have an order of 0 (null entry in StringIndex)
- return new FieldCacheDocIdSet(reader, false) {
+ return new FieldCacheDocIdSet(reader, true) {
@Override
final boolean matchDoc(int doc) {
return fcsi.order[doc] >= inclusiveLowerPoint && fcsi.order[doc] <= inclusiveUpperPoint;
@@ -171,8 +172,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final byte[] values = FieldCache.DEFAULT.getBytes(reader, field, (FieldCache.ByteParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
+ // we only respect deleted docs if the range contains 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -222,8 +223,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final short[] values = FieldCache.DEFAULT.getShorts(reader, field, (FieldCache.ShortParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
+ // ignore deleted docs if range doesn't contain 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -273,8 +274,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final int[] values = FieldCache.DEFAULT.getInts(reader, field, (FieldCache.IntParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
+ // ignore deleted docs if range doesn't contain 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -324,8 +325,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final long[] values = FieldCache.DEFAULT.getLongs(reader, field, (FieldCache.LongParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) {
+ // ignore deleted docs if range doesn't contain 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -379,8 +380,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final float[] values = FieldCache.DEFAULT.getFloats(reader, field, (FieldCache.FloatParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) {
+ // ignore deleted docs if range doesn't contain 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -434,8 +435,8 @@ public abstract class FieldCacheRangeFil
return DocIdSet.EMPTY_DOCIDSET;
final double[] values = FieldCache.DEFAULT.getDoubles(reader, field, (FieldCache.DoubleParser) parser);
- // we only request the usage of termDocs, if the range contains 0
- return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) {
+ // ignore deleted docs if range doesn't contain 0
+ return new FieldCacheDocIdSet(reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) {
@Override
boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
@@ -503,99 +504,81 @@ public abstract class FieldCacheRangeFil
static abstract class FieldCacheDocIdSet extends DocIdSet {
private final IndexReader reader;
- private boolean mayUseTermDocs;
-
- FieldCacheDocIdSet(IndexReader reader, boolean mayUseTermDocs) {
+ private boolean canIgnoreDeletedDocs;
+
+ FieldCacheDocIdSet(IndexReader reader, boolean canIgnoreDeletedDocs) {
this.reader = reader;
- this.mayUseTermDocs = mayUseTermDocs;
+ this.canIgnoreDeletedDocs = canIgnoreDeletedDocs;
}
-
- /** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */
+
+ /**
+ * this method checks, if a doc is a hit, should throw AIOBE, when position
+ * invalid
+ */
abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
-
- /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */
+
+ /**
+ * this DocIdSet is cacheable, if it can ignore deletions
+ */
@Override
public boolean isCacheable() {
- return !(mayUseTermDocs && reader.hasDeletions());
+ return canIgnoreDeletedDocs || !reader.hasDeletions();
}
@Override
public DocIdSetIterator iterator() throws IOException {
// Synchronization needed because deleted docs BitVector
// can change after call to hasDeletions until TermDocs creation.
- // We only use an iterator with termDocs, when this was requested (e.g. range contains 0)
+ // We only use an iterator with termDocs, when this was requested (e.g.
+ // range contains 0)
// and the index has deletions
- final TermDocs termDocs;
- synchronized(reader) {
- termDocs = isCacheable() ? null : reader.termDocs(null);
+
+ final Bits skipDocs;
+ synchronized (reader) {
+ if (isCacheable()) {
+ skipDocs = null;
+ } else {
+ skipDocs = MultiFields.getDeletedDocs(reader);
+ }
}
- if (termDocs != null) {
- // a DocIdSetIterator using TermDocs to iterate valid docIds
- return new DocIdSetIterator() {
- private int doc = -1;
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
+ final int maxDoc = reader.maxDoc();
+
+ // a DocIdSetIterator generating docIds by
+ // incrementing a variable & checking skipDocs -
+ return new DocIdSetIterator() {
+ private int doc = -1;
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() {
+ try {
do {
- if (!termDocs.next())
- return doc = NO_MORE_DOCS;
- } while (!matchDoc(doc = termDocs.doc()));
+ doc++;
+ } while ((skipDocs != null && doc < maxDoc && skipDocs.get(doc))
+ || !matchDoc(doc));
return doc;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return doc = NO_MORE_DOCS;
}
-
- @Override
- public int advance(int target) throws IOException {
- if (!termDocs.skipTo(target))
- return doc = NO_MORE_DOCS;
- while (!matchDoc(doc = termDocs.doc())) {
- if (!termDocs.next())
- return doc = NO_MORE_DOCS;
+ }
+
+ @Override
+ public int advance(int target) {
+ try {
+ doc = target;
+ while (!matchDoc(doc)) {
+ doc++;
}
return doc;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return doc = NO_MORE_DOCS;
}
- };
- } else {
- // a DocIdSetIterator generating docIds by incrementing a variable -
- // this one can be used if there are no deletions are on the index
- return new DocIdSetIterator() {
- private int doc = -1;
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() {
- try {
- do {
- doc++;
- } while (!matchDoc(doc));
- return doc;
- } catch (ArrayIndexOutOfBoundsException e) {
- return doc = NO_MORE_DOCS;
- }
- }
-
- @Override
- public int advance(int target) {
- try {
- doc = target;
- while (!matchDoc(doc)) {
- doc++;
- }
- return doc;
- } catch (ArrayIndexOutOfBoundsException e) {
- return doc = NO_MORE_DOCS;
- }
- }
- };
- }
+
+ }
+ };
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java Tue Apr 6 19:19:27 2010
@@ -24,7 +24,11 @@ import org.apache.lucene.index.TermEnum;
/** Abstract class for enumerating a subset of all terms.
<p>Term enumerations are always ordered by Term.compareTo(). Each term in
- the enumeration is greater than all that precede it. */
+ the enumeration is greater than all that precede it.
+
+ @deprecated Switch to {@link FilteredTermsEnum} instead.
+*/
+@Deprecated
public abstract class FilteredTermEnum extends TermEnum {
/** the current term */
protected Term currentTerm = null;
@@ -37,7 +41,14 @@ public abstract class FilteredTermEnum e
/** Equality compare on the term */
protected abstract boolean termCompare(Term term);
- /** Equality measure on the term */
+ /** Equality measure on the term, it is in reality a boost
+ * factor and used like so in {@link MultiTermQuery},
+ * so the name is wrong.
+ * @deprecated Use {@link MultiTermQuery.BoostAttribute}
+ * together with {@link FilteredTermsEnum}. For example
+ * see {@link FuzzyTermsEnum}
+ */
+ @Deprecated
public abstract float difference();
/** Indicates the end of the enumeration has been reached */
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java Tue Apr 6 19:19:27 2010
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@@ -68,6 +69,7 @@ public class FuzzyQuery extends MultiTer
*/
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
int maxExpansions) {
+ super(term.field());
this.term = term;
if (minimumSimilarity >= 1.0f)
@@ -127,7 +129,7 @@ public class FuzzyQuery extends MultiTer
return prefixLength;
}
- @Override
+ @Override @Deprecated
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
if (!termLongEnough) { // can only match if it's exact
return new SingleTermEnum(reader, term);
@@ -135,6 +137,14 @@ public class FuzzyQuery extends MultiTer
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
}
+ @Override
+ protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ if (!termLongEnough) { // can only match if it's exact
+ return new SingleTermsEnum(reader, term);
+ }
+ return new FuzzyTermsEnum(reader, getTerm(), minimumSimilarity, prefixLength);
+ }
+
/**
* Returns the pattern term.
*/
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java Tue Apr 6 19:19:27 2010
@@ -27,7 +27,10 @@ import org.apache.lucene.index.Term;
*
* <p>Term enumerations are always ordered by Term.compareTo(). Each term in
* the enumeration is greater than all that precede it.
+ *
+ * @deprecated Please use {@link FuzzyTermsEnum} instead.
*/
+@Deprecated
public final class FuzzyTermEnum extends FilteredTermEnum {
/* Allows us save time required to create a new array
@@ -136,7 +139,8 @@ public final class FuzzyTermEnum extends
return false;
}
- /** {@inheritDoc} */
+ /** @deprecated Use {@link MultiTermQuery.BoostAttribute} together with {@link FuzzyTermsEnum} */
+ @Deprecated
@Override
public final float difference() {
return (similarity - minimumSimilarity) * scale_factor;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java Tue Apr 6 19:19:27 2010
@@ -19,8 +19,9 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.Bits;
import java.util.Set;
import java.io.IOException;
@@ -45,16 +46,18 @@ public class MatchAllDocsQuery extends Q
}
private class MatchAllScorer extends Scorer {
- final TermDocs termDocs;
final float score;
final byte[] norms;
private int doc = -1;
+ private final int maxDoc;
+ private final Bits delDocs;
MatchAllScorer(IndexReader reader, Similarity similarity, Weight w,
byte[] norms) throws IOException {
super(similarity);
- this.termDocs = reader.termDocs(null);
+ delDocs = MultiFields.getDeletedDocs(reader);
score = w.getValue();
+ maxDoc = reader.maxDoc();
this.norms = norms;
}
@@ -65,7 +68,14 @@ public class MatchAllDocsQuery extends Q
@Override
public int nextDoc() throws IOException {
- return doc = termDocs.next() ? termDocs.doc() : NO_MORE_DOCS;
+ doc++;
+ while(delDocs != null && doc < maxDoc && delDocs.get(doc)) {
+ doc++;
+ }
+ if (doc == maxDoc) {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
}
@Override
@@ -75,7 +85,8 @@ public class MatchAllDocsQuery extends Q
@Override
public int advance(int target) throws IOException {
- return doc = termDocs.skipTo(target) ? termDocs.doc() : NO_MORE_DOCS;
+ doc = target-1;
+ return nextDoc();
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Tue Apr 6 19:19:27 2010
@@ -21,10 +21,14 @@ import java.io.IOException;
import java.util.*;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultipleTermPositions;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.Bits;
/**
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
@@ -167,27 +171,31 @@ public class MultiPhraseQuery extends Qu
if (termArrays.size() == 0) // optimize zero-term case
return null;
- TermPositions[] tps = new TermPositions[termArrays.size()];
- for (int i=0; i<tps.length; i++) {
+ DocsAndPositionsEnum[] postings = new DocsAndPositionsEnum[termArrays.size()];
+ for (int i=0; i<postings.length; i++) {
Term[] terms = termArrays.get(i);
- TermPositions p;
- if (terms.length > 1)
- p = new MultipleTermPositions(reader, terms);
- else
- p = reader.termPositions(terms[0]);
+ final DocsAndPositionsEnum postingsEnum;
+ if (terms.length > 1) {
+ postingsEnum = new UnionDocsAndPositionsEnum(reader, terms);
+ } else {
+ postingsEnum = reader.termPositionsEnum(MultiFields.getDeletedDocs(reader),
+ terms[0].field(),
+ new BytesRef(terms[0].text()));
+ }
- if (p == null)
+ if (postingsEnum == null) {
return null;
+ }
- tps[i] = p;
+ postings[i] = postingsEnum;
}
if (slop == 0)
- return new ExactPhraseScorer(this, tps, getPositions(), similarity,
+ return new ExactPhraseScorer(this, postings, getPositions(), similarity,
reader.norms(field));
else
- return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
+ return new SloppyPhraseScorer(this, postings, getPositions(), similarity,
slop, reader.norms(field));
}
@@ -370,3 +378,169 @@ public class MultiPhraseQuery extends Qu
return true;
}
}
+
+/**
+ * Takes the logical union of multiple DocsEnum iterators.
+ */
+
+// TODO: if ever we allow subclassing of the *PhraseScorer
+class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
+
+ private static final class DocsQueue extends PriorityQueue<DocsAndPositionsEnum> {
+ DocsQueue(List<DocsAndPositionsEnum> docsEnums) throws IOException {
+ initialize(docsEnums.size());
+
+ Iterator<DocsAndPositionsEnum> i = docsEnums.iterator();
+ while (i.hasNext()) {
+ DocsAndPositionsEnum postings = (DocsAndPositionsEnum) i.next();
+ if (postings.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+ add(postings);
+ }
+ }
+ }
+
+ final public DocsEnum peek() {
+ return top();
+ }
+
+ @Override
+ public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) {
+ return a.docID() < b.docID();
+ }
+ }
+
+ private static final class IntQueue {
+ private int _arraySize = 16;
+ private int _index = 0;
+ private int _lastIndex = 0;
+ private int[] _array = new int[_arraySize];
+
+ final void add(int i) {
+ if (_lastIndex == _arraySize)
+ growArray();
+
+ _array[_lastIndex++] = i;
+ }
+
+ final int next() {
+ return _array[_index++];
+ }
+
+ final void sort() {
+ Arrays.sort(_array, _index, _lastIndex);
+ }
+
+ final void clear() {
+ _index = 0;
+ _lastIndex = 0;
+ }
+
+ final int size() {
+ return (_lastIndex - _index);
+ }
+
+ private void growArray() {
+ int[] newArray = new int[_arraySize * 2];
+ System.arraycopy(_array, 0, newArray, 0, _arraySize);
+ _array = newArray;
+ _arraySize *= 2;
+ }
+ }
+
+ private int _doc;
+ private int _freq;
+ private DocsQueue _queue;
+ private IntQueue _posList;
+
+ public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException {
+ List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
+ final Bits delDocs = MultiFields.getDeletedDocs(indexReader);
+ for (int i = 0; i < terms.length; i++) {
+ DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs,
+ terms[i].field(),
+ new BytesRef(terms[i].text()));
+ if (postings != null) {
+ docsEnums.add(postings);
+ }
+ }
+
+ _queue = new DocsQueue(docsEnums);
+ _posList = new IntQueue();
+ }
+
+ @Override
+ public final int nextDoc() throws IOException {
+ if (_queue.size() == 0) {
+ return NO_MORE_DOCS;
+ }
+
+ // TODO: move this init into positions(): if the search
+ // doesn't need the positions for this doc then don't
+ // waste CPU merging them:
+ _posList.clear();
+ _doc = _queue.top().docID();
+
+ // merge sort all positions together
+ DocsAndPositionsEnum postings;
+ do {
+ postings = _queue.top();
+
+ final int freq = postings.freq();
+ for (int i = 0; i < freq; i++) {
+ _posList.add(postings.nextPosition());
+ }
+
+ if (postings.nextDoc() != NO_MORE_DOCS) {
+ _queue.updateTop();
+ } else {
+ _queue.pop();
+ }
+ } while (_queue.size() > 0 && _queue.top().docID() == _doc);
+
+ _posList.sort();
+ _freq = _posList.size();
+
+ return _doc;
+ }
+
+ @Override
+ public int nextPosition() {
+ return _posList.next();
+ }
+
+ @Override
+ public int getPayloadLength() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean hasPayload() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public final int advance(int target) throws IOException {
+ while (_queue.top() != null && target > _queue.top().docID()) {
+ DocsAndPositionsEnum postings = _queue.pop();
+ if (postings.advance(target) != NO_MORE_DOCS) {
+ _queue.add(postings);
+ }
+ }
+ return nextDoc();
+ }
+
+ @Override
+ public final int freq() {
+ return _freq;
+ }
+
+ @Override
+ public final int docID() {
+ return _doc;
+ }
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Tue Apr 6 19:19:27 2010
@@ -24,17 +24,24 @@ import java.util.PriorityQueue;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.VirtualMethod;
/**
* An abstract {@link Query} that matches documents
* containing a subset of terms provided by a {@link
- * FilteredTermEnum} enumeration.
+ * FilteredTermsEnum} enumeration.
*
* <p>This query cannot be used directly; you must subclass
- * it and define {@link #getEnum} to provide a {@link
- * FilteredTermEnum} that iterates through the terms to be
+ * it and define {@link #getTermsEnum} to provide a {@link
+ * FilteredTermsEnum} that iterates through the terms to be
* matched.
*
* <p><b>NOTE</b>: if {@link #setRewriteMethod} is either
@@ -61,8 +68,90 @@ import org.apache.lucene.queryParser.Que
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default.
*/
public abstract class MultiTermQuery extends Query {
+ protected final String field;
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
transient int numberOfTerms = 0;
+
+ /** @deprecated remove when getEnum is removed */
+ private static final VirtualMethod<MultiTermQuery> getEnumMethod =
+ new VirtualMethod<MultiTermQuery>(MultiTermQuery.class, "getEnum", IndexReader.class);
+ /** @deprecated remove when getEnum is removed */
+ private static final VirtualMethod<MultiTermQuery> getTermsEnumMethod =
+ new VirtualMethod<MultiTermQuery>(MultiTermQuery.class, "getTermsEnum", IndexReader.class);
+ /** @deprecated remove when getEnum is removed */
+ final boolean hasNewAPI =
+ VirtualMethod.compareImplementationDistance(getClass(),
+ getTermsEnumMethod, getEnumMethod) >= 0; // its ok for both to be overridden
+
+ /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum}
+ * and update the boost on each returned term. This enables to control the boost factor
+ * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or
+ * {@link TopTermsBooleanQueryRewrite} mode.
+ * {@link FuzzyQuery} is using this to take the edit distance into account.
+ */
+ public static interface BoostAttribute extends Attribute {
+ /** Sets the boost in this attribute */
+ public void setBoost(float boost);
+ /** Retrieves the boost, default is {@code 1.0f}. */
+ public float getBoost();
+ /** Sets the maximum boost for terms that would never get
+ * into the priority queue of {@link MultiTermQuery.TopTermsBooleanQueryRewrite}.
+ * This value is not changed by {@link AttributeImpl#clear}
+ * and not used in {@code equals()} and {@code hashCode()}.
+ * Do not change the value in the {@link TermsEnum}!
+ */
+ public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
+ /** Retrieves the maximum boost that is not competitive,
+ * default is megative infinity. You can use this boost value
+ * as a hint when writing the {@link TermsEnum}.
+ */
+ public float getMaxNonCompetitiveBoost();
+ }
+
+ /** Implementation class for {@link BoostAttribute}. */
+ public static final class BoostAttributeImpl extends AttributeImpl implements BoostAttribute {
+ private float boost = 1.0f, maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ public float getBoost() {
+ return boost;
+ }
+
+ public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
+ this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
+ }
+
+ public float getMaxNonCompetitiveBoost() {
+ return maxNonCompetitiveBoost;
+ }
+
+ @Override
+ public void clear() {
+ boost = 1.0f;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other)
+ return true;
+ if (other instanceof BoostAttributeImpl)
+ return ((BoostAttributeImpl) other).boost == boost;
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Float.floatToIntBits(boost);
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ((BoostAttribute) target).setBoost(boost);
+ }
+ }
/** Abstract class that defines how the query is rewritten. */
public static abstract class RewriteMethod implements Serializable {
@@ -100,30 +189,79 @@ public abstract class MultiTermQuery ext
private abstract static class BooleanQueryRewrite extends RewriteMethod {
protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
- final FilteredTermEnum enumerator = query.getEnum(reader);
- int count = 0;
- try {
- do {
- Term t = enumerator.term();
- if (t != null) {
- if (collector.collect(t, enumerator.difference())) {
- count++;
- } else {
- break;
- }
+
+ if (query.hasNewAPI) {
+
+ if (query.field == null) {
+ throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
+ }
+
+ final Fields fields = MultiFields.getFields(reader);
+ if (fields == null) {
+ // reader has no fields
+ return 0;
+ }
+
+ final Terms terms = fields.terms(query.field);
+ if (terms == null) {
+ // field does not exist
+ return 0;
+ }
+
+ final TermsEnum termsEnum = query.getTermsEnum(reader);
+ assert termsEnum != null;
+
+ if (termsEnum == TermsEnum.EMPTY)
+ return 0;
+ final BoostAttribute boostAtt =
+ termsEnum.attributes().addAttribute(BoostAttribute.class);
+ collector.boostAtt = boostAtt;
+ int count = 0;
+ BytesRef term;
+ final Term placeholderTerm = new Term(query.field);
+ while ((term = termsEnum.next()) != null) {
+ if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) {
+ count++;
+ } else {
+ break;
}
- } while (enumerator.next());
- } finally {
- enumerator.close();
+ }
+ collector.boostAtt = null;
+ return count;
+ } else {
+ // deprecated case
+ final FilteredTermEnum enumerator = query.getEnum(reader);
+ int count = 0;
+ try {
+ do {
+ Term t = enumerator.term();
+ if (t != null) {
+ if (collector.collect(t, enumerator.difference())) {
+ count++;
+ } else {
+ break;
+ }
+ }
+ } while (enumerator.next());
+ } finally {
+ enumerator.close();
+ }
+ return count;
}
- return count;
}
- protected interface TermCollector {
+ protected static abstract class TermCollector {
+ /** this field is only set if a boostAttribute is used (e.g. {@link FuzzyTermsEnum}) */
+ private BoostAttribute boostAtt = null;
+
/** return false to stop collecting */
- boolean collect(Term t, float boost) throws IOException;
+ public abstract boolean collect(Term t, float boost) throws IOException;
+
+ /** set the minimum boost as a hint for the term producer */
+ protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
+ if (boostAtt != null) boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+ }
}
-
}
private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite {
@@ -207,6 +345,7 @@ public abstract class MultiTermQuery ext
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm();
+ setMaxNonCompetitiveBoost((stQueue.size() >= maxSize) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
return true;
}
@@ -338,6 +477,7 @@ public abstract class MultiTermQuery ext
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
Query result = super.rewrite(reader, query);
assert result instanceof BooleanQuery;
+ // TODO: if empty boolean query return NullQuery?
if (!((BooleanQuery) result).clauses().isEmpty()) {
// strip the scores off
result = new ConstantScoreQuery(new QueryWrapperFilter(result));
@@ -448,7 +588,7 @@ public abstract class MultiTermQuery ext
}
}
- private static final class CutOffTermCollector implements TermCollector {
+ private static final class CutOffTermCollector extends TermCollector {
CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
this.reader = reader;
this.docCountCutoff = docCountCutoff;
@@ -465,6 +605,7 @@ public abstract class MultiTermQuery ext
// should not be costly, because 1) the
// query/filter will load the TermInfo when it
// runs, and 2) the terms dict has a cache:
+ // @deprecated: in 4.0 use BytesRef for collectTerms()
docVisitCount += reader.docFreq(t);
return true;
}
@@ -538,12 +679,44 @@ public abstract class MultiTermQuery ext
* Constructs a query matching terms that cannot be represented with a single
* Term.
*/
+ public MultiTermQuery(final String field) {
+ this.field = field;
+ }
+
+ /**
+ * Constructs a query matching terms that cannot be represented with a single
+ * Term.
+ * @deprecated Use {@link #MultiTermQuery(String)}, as the flex branch can
+ * only work on one field per terms enum. If you override
+ * {@link #getTermsEnum(IndexReader)}, you cannot use this ctor.
+ */
+ @Deprecated
public MultiTermQuery() {
+ this(null);
}
- /** Construct the enumeration to be used, expanding the pattern term. */
- protected abstract FilteredTermEnum getEnum(IndexReader reader)
- throws IOException;
+ /** Returns the field name for this query */
+ public final String getField() { return field; }
+
+ /** Construct the enumeration to be used, expanding the
+ * pattern term.
+ * @deprecated Please override {@link #getTermsEnum} instead */
+ @Deprecated
+ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Construct the enumeration to be used, expanding the
+ * pattern term. This method should only be called if
+ * the field exists (ie, implementations can assume the
+ * field does exist). This method should not return null
+ * (should instead return {@link TermsEnum#EMPTY} if no
+ * terms match). The TermsEnum must already be
+ * positioned to the first matching term. */
+ // TODO 4.0: make this method abstract
+ protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ throw new UnsupportedOperationException();
+ }
/**
* Expert: Return the number of unique terms visited during execution of the query.
@@ -602,8 +775,8 @@ public abstract class MultiTermQuery ext
final int prime = 31;
int result = 1;
result = prime * result + Float.floatToIntBits(getBoost());
- result = prime * result;
- result += rewriteMethod.hashCode();
+ result = prime * result + rewriteMethod.hashCode();
+ if (field != null) result = prime * result + field.hashCode();
return result;
}
@@ -621,7 +794,7 @@ public abstract class MultiTermQuery ext
if (!rewriteMethod.equals(other.rewriteMethod)) {
return false;
}
- return true;
+ return (other.field == null ? field == null : other.field.equals(field));
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Tue Apr 6 19:19:27 2010
@@ -21,9 +21,15 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
/**
* A wrapper for {@link MultiTermQuery}, that exposes its
@@ -70,6 +76,9 @@ public class MultiTermQueryWrapperFilter
public final int hashCode() {
return query.hashCode();
}
+
+ /** Returns the field name for this query */
+ public final String getField() { return query.getField(); }
/**
* Expert: Return the number of unique terms visited during execution of the filter.
@@ -95,49 +104,101 @@ public class MultiTermQueryWrapperFilter
}
/**
- * Returns a DocIdSet with documents that should be
- * permitted in search results.
+ * Returns a DocIdSet with documents that should be permitted in search
+ * results.
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- final TermEnum enumerator = query.getEnum(reader);
- try {
- // if current term in enum is null, the enum is empty -> shortcut
- if (enumerator.term() == null)
+ if (query.hasNewAPI) {
+ if (query.field == null) {
+ throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
+ }
+
+ final Fields fields = MultiFields.getFields(reader);
+ if (fields == null) {
+ // reader has no fields
return DocIdSet.EMPTY_DOCIDSET;
- // else fill into a OpenBitSet
- final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
- final int[] docs = new int[32];
- final int[] freqs = new int[32];
- TermDocs termDocs = reader.termDocs();
- try {
+ }
+
+ final Terms terms = fields.terms(query.field);
+ if (terms == null) {
+ // field does not exist
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
+
+ final TermsEnum termsEnum = query.getTermsEnum(reader);
+ assert termsEnum != null;
+ if (termsEnum.next() != null) {
+ // fill into a OpenBitSet
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
int termCount = 0;
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docsEnum = null;
do {
- Term term = enumerator.term();
- if (term == null)
- break;
termCount++;
- termDocs.seek(term);
+ // System.out.println(" iter termCount=" + termCount + " term=" +
+ // enumerator.term().toBytesString());
+ docsEnum = termsEnum.docs(delDocs, docsEnum);
+ final DocsEnum.BulkReadResult result = docsEnum.getBulkResult();
while (true) {
- final int count = termDocs.read(docs, freqs);
+ final int count = docsEnum.read();
if (count != 0) {
- for(int i=0;i<count;i++) {
+ final int[] docs = result.docs.ints;
+ for (int i = 0; i < count; i++) {
bitSet.set(docs[i]);
}
} else {
break;
}
}
- } while (enumerator.next());
+ } while (termsEnum.next() != null);
+ // System.out.println(" done termCount=" + termCount);
query.incTotalNumberOfTerms(termCount);
+ return bitSet;
+ } else {
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
+ } else {
+ final TermEnum enumerator = query.getEnum(reader);
+ try {
+ // if current term in enum is null, the enum is empty -> shortcut
+ if (enumerator.term() == null)
+ return DocIdSet.EMPTY_DOCIDSET;
+ // else fill into a OpenBitSet
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ final int[] docs = new int[32];
+ final int[] freqs = new int[32];
+ TermDocs termDocs = reader.termDocs();
+ try {
+ int termCount = 0;
+ do {
+ Term term = enumerator.term();
+ if (term == null)
+ break;
+ termCount++;
+ termDocs.seek(term);
+ while (true) {
+ final int count = termDocs.read(docs, freqs);
+ if (count != 0) {
+ for (int i = 0; i < count; i++) {
+ bitSet.set(docs[i]);
+ }
+ } else {
+ break;
+ }
+ }
+ } while (enumerator.next());
+
+ query.incTotalNumberOfTerms(termCount);
+ } finally {
+ termDocs.close();
+ }
+ return bitSet;
} finally {
- termDocs.close();
+ enumerator.close();
}
- return bitSet;
- } finally {
- enumerator.close();
}
}
Propchange: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Apr 6 19:19:27 2010
@@ -1,2 +1,3 @@
-/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:896850,909334
-/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:924483-925561
+/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:824912-931101
+/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:909334
+/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:924483-924731,924781,925176-925462
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java Tue Apr 6 19:19:27 2010
@@ -168,9 +168,6 @@ public final class NumericRangeFilter<T
NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive)
);
}
-
- /** Returns the field name for this filter */
- public String getField() { return query.getField(); }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesMin() { return query.includesMin(); }
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java Tue Apr 6 19:19:27 2010
@@ -19,15 +19,15 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.LinkedList;
+import java.util.Comparator;
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.util.StringHelper;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.TermsEnum;
/**
* <p>A {@link Query} that matches numeric values within a
@@ -157,10 +157,10 @@ public final class NumericRangeQuery<T e
private NumericRangeQuery(final String field, final int precisionStep, final int valSize,
T min, T max, final boolean minInclusive, final boolean maxInclusive
) {
+ super(field);
assert (valSize == 32 || valSize == 64);
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
- this.field = StringHelper.intern(field);
this.precisionStep = precisionStep;
this.valSize = valSize;
this.min = min;
@@ -299,14 +299,14 @@ public final class NumericRangeQuery<T e
) {
return new NumericRangeQuery<Float>(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive);
}
-
- @Override
- protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
- return new NumericRangeTermEnum(reader);
- }
- /** Returns the field name for this query */
- public String getField() { return field; }
+ @Override @SuppressWarnings("unchecked")
+ protected TermsEnum getTermsEnum(final IndexReader reader) throws IOException {
+ // very strange: java.lang.Number itsself is not Comparable, but all subclasses used here are
+ return (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) ?
+ TermsEnum.EMPTY :
+ new NumericRangeTermsEnum(reader);
+ }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesMin() { return minInclusive; }
@@ -323,7 +323,7 @@ public final class NumericRangeQuery<T e
@Override
public String toString(final String field) {
final StringBuilder sb = new StringBuilder();
- if (!this.field.equals(field)) sb.append(this.field).append(':');
+ if (!getField().equals(field)) sb.append(getField()).append(':');
return sb.append(minInclusive ? '[' : '{')
.append((min == null) ? "*" : min.toString())
.append(" TO ")
@@ -341,7 +341,6 @@ public final class NumericRangeQuery<T e
if (o instanceof NumericRangeQuery) {
final NumericRangeQuery q=(NumericRangeQuery)o;
return (
- field==q.field &&
(q.min == null ? min == null : q.min.equals(min)) &&
(q.max == null ? max == null : q.max.equals(max)) &&
minInclusive == q.minInclusive &&
@@ -355,29 +354,22 @@ public final class NumericRangeQuery<T e
@Override
public final int hashCode() {
int hash = super.hashCode();
- hash += field.hashCode()^0x4565fd66 + precisionStep^0x64365465;
+ hash += precisionStep^0x64365465;
if (min != null) hash += min.hashCode()^0x14fa55fb;
if (max != null) hash += max.hashCode()^0x733fa5fe;
return hash +
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
-
- // field must be interned after reading from stream
- private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
- in.defaultReadObject();
- field = StringHelper.intern(field);
- }
// members (package private, to be also fast accessible by NumericRangeTermEnum)
- String field;
final int precisionStep, valSize;
final T min, max;
final boolean minInclusive,maxInclusive;
/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * sub-ranges for trie range queries.
+ * Subclass of FilteredTermsEnum for enumerating all terms that match the
+ * sub-ranges for trie range queries, using flex API.
* <p>
* WARNING: This term enumeration is not guaranteed to be always ordered by
* {@link Term#compareTo}.
@@ -385,16 +377,15 @@ public final class NumericRangeQuery<T e
* {@link NumericUtils#splitIntRange} generates the sub-ranges. For
* {@link MultiTermQuery} ordering is not relevant.
*/
- private final class NumericRangeTermEnum extends FilteredTermEnum {
+ private final class NumericRangeTermsEnum extends FilteredTermsEnum {
- private final IndexReader reader;
- private final LinkedList<String> rangeBounds = new LinkedList<String>();
- private final Term termTemplate = new Term(field);
- private String currentUpperBound = null;
+ private BytesRef currentLowerBound, currentUpperBound;
- NumericRangeTermEnum(final IndexReader reader) throws IOException {
- this.reader = reader;
-
+ private final LinkedList<BytesRef> rangeBounds = new LinkedList<BytesRef>();
+ private final Comparator<BytesRef> termComp;
+
+ NumericRangeTermsEnum(final IndexReader reader) throws IOException {
+ super(reader, getField());
switch (valSize) {
case 64: {
// lower
@@ -423,7 +414,7 @@ public final class NumericRangeQuery<T e
NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() {
@Override
- public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
+ public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) {
rangeBounds.add(minPrefixCoded);
rangeBounds.add(maxPrefixCoded);
}
@@ -458,7 +449,7 @@ public final class NumericRangeQuery<T e
NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() {
@Override
- public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
+ public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) {
rangeBounds.add(minPrefixCoded);
rangeBounds.add(maxPrefixCoded);
}
@@ -470,85 +461,32 @@ public final class NumericRangeQuery<T e
// should never happen
throw new IllegalArgumentException("valSize must be 32 or 64");
}
-
- // seek to first term
- next();
- }
-
- @Override
- public float difference() {
- return 1.0f;
- }
-
- /** this is a dummy, it is not used by this class. */
- @Override
- protected boolean endEnum() {
- throw new UnsupportedOperationException("not implemented");
- }
- /** this is a dummy, it is not used by this class. */
- @Override
- protected void setEnum(TermEnum tenum) {
- throw new UnsupportedOperationException("not implemented");
+ termComp = getComparator();
}
- /**
- * Compares if current upper bound is reached.
- * In contrast to {@link FilteredTermEnum}, a return value
- * of <code>false</code> ends iterating the current enum
- * and forwards to the next sub-range.
- */
@Override
- protected boolean termCompare(Term term) {
- return (term.field() == field && term.text().compareTo(currentUpperBound) <= 0);
- }
-
- /** Increments the enumeration to the next element. True if one exists. */
- @Override
- public boolean next() throws IOException {
- // if a current term exists, the actual enum is initialized:
- // try change to next term, if no such term exists, fall-through
- if (currentTerm != null) {
- assert actualEnum != null;
- if (actualEnum.next()) {
- currentTerm = actualEnum.term();
- if (termCompare(currentTerm))
- return true;
- }
- }
-
- // if all above fails, we go forward to the next enum,
- // if one is available
- currentTerm = null;
- while (rangeBounds.size() >= 2) {
+ protected final BytesRef nextSeekTerm(BytesRef term) throws IOException {
+ if (rangeBounds.size() >= 2) {
assert rangeBounds.size() % 2 == 0;
- // close the current enum and read next bounds
- if (actualEnum != null) {
- actualEnum.close();
- actualEnum = null;
- }
- final String lowerBound = rangeBounds.removeFirst();
+
+ this.currentLowerBound = rangeBounds.removeFirst();
+ assert currentUpperBound == null || termComp.compare(currentUpperBound, currentLowerBound) <= 0 :
+ "The current upper bound must be <= the new lower bound";
+
this.currentUpperBound = rangeBounds.removeFirst();
- // create a new enum
- actualEnum = reader.terms(termTemplate.createTerm(lowerBound));
- currentTerm = actualEnum.term();
- if (currentTerm != null && termCompare(currentTerm))
- return true;
- // clear the current term for next iteration
- currentTerm = null;
+ return currentLowerBound;
}
// no more sub-range enums available
- assert rangeBounds.size() == 0 && currentTerm == null;
- return false;
+ assert rangeBounds.size() == 0;
+ return null;
}
-
- /** Closes the enumeration to further activity, freeing resources. */
+
@Override
- public void close() throws IOException {
- rangeBounds.clear();
- currentUpperBound = null;
- super.close();
+ protected AcceptStatus accept(BytesRef term) {
+ return (currentUpperBound != null && termComp.compare(term, currentUpperBound) <= 0) ?
+ AcceptStatus.YES : AcceptStatus.NO_AND_SEEK;
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhrasePositions.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhrasePositions.java Tue Apr 6 19:19:27 2010
@@ -28,40 +28,33 @@ final class PhrasePositions {
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
- TermPositions tp; // stream of positions
- PhrasePositions next; // used to make lists
+ final DocsAndPositionsEnum postings; // stream of docs & positions
+ PhrasePositions next; // used to make lists
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
- PhrasePositions(TermPositions t, int o) {
- tp = t;
+ PhrasePositions(DocsAndPositionsEnum postings, int o) {
+ this.postings = postings;
offset = o;
}
final boolean next() throws IOException { // increments to next doc
- if (!tp.next()) {
- tp.close(); // close stream
- doc = Integer.MAX_VALUE; // sentinel value
+ doc = postings.nextDoc();
+ if (doc == postings.NO_MORE_DOCS) {
return false;
}
- doc = tp.doc();
- position = 0;
return true;
}
final boolean skipTo(int target) throws IOException {
- if (!tp.skipTo(target)) {
- tp.close(); // close stream
- doc = Integer.MAX_VALUE; // sentinel value
+ doc = postings.advance(target);
+ if (doc == postings.NO_MORE_DOCS) {
return false;
}
- doc = tp.doc();
- position = 0;
return true;
}
-
final void firstPosition() throws IOException {
- count = tp.freq(); // read first pos
+ count = postings.freq(); // read first pos
nextPosition();
}
@@ -73,7 +66,7 @@ final class PhrasePositions {
*/
final boolean nextPosition() throws IOException {
if (count-- > 0) { // read subsequent pos's
- position = tp.nextPosition() - offset;
+ position = postings.nextPosition() - offset;
return true;
} else
return false;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Tue Apr 6 19:19:27 2010
@@ -22,10 +22,13 @@ import java.util.Set;
import java.util.ArrayList;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.Bits;
/** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
@@ -150,20 +153,35 @@ public class PhraseQuery extends Query {
if (terms.size() == 0) // optimize zero-term case
return null;
- TermPositions[] tps = new TermPositions[terms.size()];
+ DocsAndPositionsEnum[] postings = new DocsAndPositionsEnum[terms.size()];
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
for (int i = 0; i < terms.size(); i++) {
- TermPositions p = reader.termPositions(terms.get(i));
- if (p == null)
- return null;
- tps[i] = p;
+ final Term t = terms.get(i);
+ final BytesRef text = new BytesRef(t.text());
+ DocsAndPositionsEnum postingsEnum = MultiFields.getTermPositionsEnum(reader,
+ delDocs,
+ t.field(),
+ text);
+ // PhraseQuery on a field that did not index
+ // positions.
+ if (postingsEnum == null) {
+ if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), text) != null) {
+ // term does exist, but has no positions
+ throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
+ } else {
+ // term does not exist
+ return null;
+ }
+ }
+ postings[i] = postingsEnum;
}
if (slop == 0) // optimize exact case
- return new ExactPhraseScorer(this, tps, getPositions(), similarity,
+ return new ExactPhraseScorer(this, postings, getPositions(), similarity,
reader.norms(field));
else
return
- new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
+ new SloppyPhraseScorer(this, postings, getPositions(), similarity, slop,
reader.norms(field));
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseScorer.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseScorer.java Tue Apr 6 19:19:27 2010
@@ -19,7 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
@@ -43,7 +43,7 @@ abstract class PhraseScorer extends Scor
private float freq; //phrase frequency in current doc as computed by phraseFreq().
- PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets,
+ PhraseScorer(Weight weight, DocsAndPositionsEnum[] postings, int[] offsets,
Similarity similarity, byte[] norms) {
super(similarity);
this.norms = norms;
@@ -55,8 +55,8 @@ abstract class PhraseScorer extends Scor
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
- for (int i = 0; i < tps.length; i++) {
- PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
+ for (int i = 0; i < postings.length; i++) {
+ PhrasePositions pp = new PhrasePositions(postings[i], offsets[i]);
if (last != null) { // add next to end of list
last.next = pp;
} else {
@@ -65,7 +65,7 @@ abstract class PhraseScorer extends Scor
last = pp;
}
- pq = new PhraseQueue(tps.length); // construct empty pq
+ pq = new PhraseQueue(postings.length); // construct empty pq
first.doc = -1;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java Tue Apr 6 19:19:27 2010
@@ -20,7 +20,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
@@ -34,23 +37,34 @@ public class PrefixQuery extends MultiTe
/** Constructs a query for terms starting with <code>prefix</code>. */
public PrefixQuery(Term prefix) {
+ super(prefix.field());
this.prefix = prefix;
}
/** Returns the prefix of this query. */
public Term getPrefix() { return prefix; }
- @Override
+ @Override @Deprecated
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new PrefixTermEnum(reader, prefix);
}
+
+ @Override
+ protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ if (prefix.text().length() == 0) {
+ // no prefix -- match all terms for this field:
+ final Terms terms = MultiFields.getTerms(reader, getField());
+ return (terms != null) ? terms.iterator() : TermsEnum.EMPTY;
+ }
+ return new PrefixTermsEnum(reader, prefix);
+ }
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
- if (!prefix.field().equals(field)) {
- buffer.append(prefix.field());
+ if (!getField().equals(field)) {
+ buffer.append(getField());
buffer.append(":");
}
buffer.append(prefix.text());
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java Tue Apr 6 19:19:27 2010
@@ -29,7 +29,9 @@ import org.apache.lucene.index.Term;
* Term enumerations are always ordered by Term.compareTo(). Each term in
* the enumeration is greater than all that precede it.
*
+ * @deprecated Use {@link PrefixTermsEnum} instead.
*/
+@Deprecated
public class PrefixTermEnum extends FilteredTermEnum {
private final Term prefix;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java Tue Apr 6 19:19:27 2010
@@ -857,6 +857,7 @@ public abstract class Similarity impleme
* @return An implementation dependent float to be used as a scoring factor
*
*/
+ // TODO: maybe switch this API to BytesRef?
public float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length)
{
return 1;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermEnum.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermEnum.java Tue Apr 6 19:19:27 2010
@@ -29,6 +29,7 @@ import org.apache.lucene.index.Term;
* but want to preserve MultiTermQuery semantics such as
* {@link MultiTermQuery#rewriteMethod}.
*/
+@Deprecated
public class SingleTermEnum extends FilteredTermEnum {
private Term singleTerm;
private boolean endEnum = false;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Tue Apr 6 19:19:27 2010
@@ -17,7 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import java.io.IOException;
import java.util.HashMap;
@@ -28,9 +28,9 @@ final class SloppyPhraseScorer extends P
private PhrasePositions tmpPos[]; // for flipping repeating pps.
private boolean checkedRepeats;
- SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity,
+ SloppyPhraseScorer(Weight weight, DocsAndPositionsEnum[] postings, int[] offsets, Similarity similarity,
int slop, byte[] norms) {
- super(weight, tps, offsets, similarity, norms);
+ super(weight, postings, offsets, similarity, norms);
this.slop = slop;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Apr 6 19:19:27 2010
@@ -20,8 +20,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ToStringUtils;
@@ -71,12 +73,14 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
- TermDocs termDocs = reader.termDocs(term);
-
- if (termDocs == null)
+ // NOTE: debateably, the caller should never pass in a
+ // multi reader...
+ DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+ if (docs == null) {
return null;
+ }
- return new TermScorer(this, termDocs, similarity, reader.norms(term.field()));
+ return new TermScorer(this, docs, similarity, reader.norms(term.field()));
}
@Override
@@ -114,15 +118,12 @@ public class TermQuery extends Query {
Explanation tfExplanation = new Explanation();
int tf = 0;
- TermDocs termDocs = reader.termDocs(term);
- if (termDocs != null) {
- try {
- if (termDocs.skipTo(doc) && termDocs.doc() == doc) {
- tf = termDocs.freq();
+ DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+ if (docs != null) {
+ int newDoc = docs.advance(doc);
+ if (newDoc == doc) {
+ tf = docs.freq();
}
- } finally {
- termDocs.close();
- }
tfExplanation.setValue(similarity.tf(tf));
tfExplanation.setDescription("tf(termFreq("+term+")="+tf+")");
} else {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermRangeFilter.java Tue Apr 6 19:19:27 2010
@@ -87,9 +87,6 @@ public class TermRangeFilter extends Mul
public static TermRangeFilter More(String fieldName, String lowerTerm) {
return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
}
-
- /** Returns the field name for this filter */
- public String getField() { return query.getField(); }
/** Returns the lower value of this range filter */
public String getLowerTerm() { return query.getLowerTerm(); }