You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/10/13 19:00:16 UTC
svn commit: r1182982 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/search/BooleanQuery.java
java/org/apache/lucene/search/BooleanScorer.java
test/org/apache/lucene/search/TestBooleanScorer.java
Author: mikemccand
Date: Thu Oct 13 17:00:16 2011
New Revision: 1182982
URL: http://svn.apache.org/viewvc?rev=1182982&view=rev
Log:
LUCENE-3510: don't limit number of prohibited clauses to 32 in BooleanScorer
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1182982&r1=1182981&r2=1182982&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Thu Oct 13 17:00:16 2011
@@ -329,7 +329,7 @@ public class BooleanQuery extends Query
}
// Check if we can return a BooleanScorer
- if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
+ if (!scoreDocsInOrder && topScorer && required.size() == 0) {
return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);
}
@@ -375,10 +375,6 @@ public class BooleanQuery extends Query
}
}
- if (numProhibited > 32) { // cannot use BS
- return false;
- }
-
// scorer() will return an out-of-order scorer if requested.
return true;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1182982&r1=1182981&r2=1182982&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Thu Oct 13 17:00:16 2011
@@ -29,10 +29,10 @@ import org.apache.lucene.search.BooleanQ
/* Description from Doug Cutting (excerpted from
* LUCENE-1483):
*
- * BooleanScorer uses a ~16k array to score windows of
- * docs. So it scores docs 0-16k first, then docs 16-32k,
+ * BooleanScorer uses an array to score windows of
+ * 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
* etc. For each window it iterates through all query terms
- * and accumulates a score in table[doc%16k]. It also stores
+ * and accumulates a score in table[doc%2K]. It also stores
* in the table a bitmask representing which terms
* contributed to the score. Non-zero scores are chained in
* a linked list. At the end of scoring each window it then
@@ -75,9 +75,7 @@ final class BooleanScorer extends Scorer
public void collect(final int doc) throws IOException {
final BucketTable table = bucketTable;
final int i = doc & BucketTable.MASK;
- Bucket bucket = table.buckets[i];
- if (bucket == null)
- table.buckets[i] = bucket = new Bucket();
+ final Bucket bucket = table.buckets[i];
if (bucket.doc != doc) { // invalid bucket
bucket.doc = doc; // set doc
@@ -143,6 +141,9 @@ final class BooleanScorer extends Scorer
static final class Bucket {
int doc = -1; // tells if bucket is valid
float score; // incremental score
+ // TODO: break out bool anyProhibited, int
+ // numRequiredMatched; then we can remove 32 limit on
+ // required clauses
int bits; // used for bool constraints
int coord; // count of terms in score
Bucket next; // next valid bucket
@@ -156,7 +157,13 @@ final class BooleanScorer extends Scorer
final Bucket[] buckets = new Bucket[SIZE];
Bucket first = null; // head of valid list
- public BucketTable() {}
+ public BucketTable() {
+ // Pre-fill to save the lazy init when collecting
+ // each sub:
+ for(int idx=0;idx<SIZE;idx++) {
+ buckets[idx] = new Bucket();
+ }
+ }
public Collector newCollector(int mask) {
return new BooleanScorerCollector(mask, this);
@@ -169,7 +176,7 @@ final class BooleanScorer extends Scorer
public Scorer scorer;
// TODO: re-enable this if BQ ever sends us required clauses
//public boolean required = false;
- public boolean prohibited = false;
+ public boolean prohibited;
public Collector collector;
public SubScorer next;
@@ -193,12 +200,13 @@ final class BooleanScorer extends Scorer
private final float[] coordFactors;
// TODO: re-enable this if BQ ever sends us required clauses
//private int requiredMask = 0;
- private int prohibitedMask = 0;
- private int nextMask = 1;
private final int minNrShouldMatch;
private int end;
private Bucket current;
private int doc = -1;
+
+ // Any time a prohibited clause matches we set bit 0:
+ private static final int PROHIBITED_MASK = 1;
BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch,
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
@@ -215,11 +223,8 @@ final class BooleanScorer extends Scorer
if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
for (Scorer scorer : prohibitedScorers) {
- int mask = nextMask;
- nextMask = nextMask << 1;
- prohibitedMask |= mask; // update prohibited mask
if (scorer.nextDoc() != NO_MORE_DOCS) {
- scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
+ scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
}
}
}
@@ -233,9 +238,12 @@ final class BooleanScorer extends Scorer
// firstDocID is ignored since nextDoc() initializes 'current'
@Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
+ // Make sure it's only BooleanScorer that calls us:
+ assert firstDocID == -1;
boolean more;
Bucket tmp;
BucketScorer bs = new BucketScorer(weight);
+
// The internal loop will set the score and doc before calling collect.
collector.setScorer(bs);
do {
@@ -244,12 +252,13 @@ final class BooleanScorer extends Scorer
while (current != null) { // more queued
// check prohibited & required
- if ((current.bits & prohibitedMask) == 0) {
+ if ((current.bits & PROHIBITED_MASK) == 0) {
- // TODO: re-enable this if BQ ever sends us required
- // clauses
- //&& (current.bits & requiredMask) == requiredMask) {
+ // TODO: re-enable this if BQ ever sends us required
+ // clauses
+ //&& (current.bits & requiredMask) == requiredMask) {
+ // TODO: can we remove this?
if (current.doc >= max){
tmp = current;
current = current.next;
@@ -298,48 +307,22 @@ final class BooleanScorer extends Scorer
@Override
public int docID() {
- return doc;
+ throw new UnsupportedOperationException();
}
@Override
public int nextDoc() throws IOException {
- boolean more;
- do {
- while (bucketTable.first != null) { // more queued
- current = bucketTable.first;
- bucketTable.first = current.next; // pop the queue
-
- // check prohibited & required, and minNrShouldMatch
- if ((current.bits & prohibitedMask) == 0 &&
- current.coord >= minNrShouldMatch) {
- // TODO: re-enable this if BQ ever sends us required clauses
- // (current.bits & requiredMask) == requiredMask &&
- return doc = current.doc;
- }
- }
-
- // refill the queue
- more = false;
- end += BucketTable.SIZE;
- for (SubScorer sub = scorers; sub != null; sub = sub.next) {
- int subScorerDocID = sub.scorer.docID();
- if (subScorerDocID != NO_MORE_DOCS) {
- more |= sub.scorer.score(sub.collector, end, subScorerDocID);
- }
- }
- } while (bucketTable.first != null || more);
-
- return doc = NO_MORE_DOCS;
+ throw new UnsupportedOperationException();
}
@Override
public float score() {
- return current.score * coordFactors[current.coord];
+ throw new UnsupportedOperationException();
}
@Override
public void score(Collector collector) throws IOException {
- score(collector, Integer.MAX_VALUE, nextDoc());
+ score(collector, Integer.MAX_VALUE, -1);
}
@Override
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1182982&r1=1182981&r2=1182982&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java Thu Oct 13 17:00:16 2011
@@ -18,16 +18,19 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
import org.apache.lucene.store.Directory;
-
import org.apache.lucene.util.LuceneTestCase;
public class TestBooleanScorer extends LuceneTestCase
@@ -93,13 +96,88 @@ public class TestBooleanScorer extends L
}};
BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.asList(scorers), null, scorers.length);
-
- assertEquals("should have received 3000", 3000, bs.nextDoc());
- assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());
+
+ final List<Integer> hits = new ArrayList<Integer>();
+ bs.score(new Collector() {
+ int docBase;
+ @Override
+ public void setScorer(Scorer scorer) {
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ hits.add(docBase+doc);
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+ });
+
+ assertEquals("should have only 1 hit", 1, hits.size());
+ assertEquals("hit should have been docID=3000", 3000, hits.get(0).intValue());
searcher.close();
ir.close();
directory.close();
-
}
+ public void testMoreThan32ProhibitedClauses() throws Exception {
+ final Directory d = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, d);
+ Document doc = new Document();
+ doc.add(new TextField("field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33"));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new TextField("field", "33"));
+ w.addDocument(doc);
+ final IndexReader r = w.getReader();
+ w.close();
+ final IndexSearcher s = newSearcher(r);
+
+ final BooleanQuery q = new BooleanQuery();
+ for(int term=0;term<33;term++) {
+ q.add(new BooleanClause(new TermQuery(new Term("field", ""+term)),
+ BooleanClause.Occur.MUST_NOT));
+ }
+ q.add(new BooleanClause(new TermQuery(new Term("field", "33")),
+ BooleanClause.Occur.SHOULD));
+
+ final int[] count = new int[1];
+ s.search(q, new Collector() {
+ private Scorer scorer;
+
+ @Override
+ public void setScorer(Scorer scorer) {
+ // Make sure we got BooleanScorer:
+ this.scorer = scorer;
+ assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", scorer.getClass().getName());
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ count[0]++;
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext context) {
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+ });
+
+ assertEquals(1, count[0]);
+
+ s.close();
+ r.close();
+ d.close();
+ }
}