You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/10/13 19:14:16 UTC

svn commit: r1182992 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/test/ lucene/backwards/src/test/org/apache/lucene/search/ lucene/src/java/org/apache/lucene/search/ lucene/src/test/org/apache/lucene/search/ solr/

Author: mikemccand
Date: Thu Oct 13 17:14:15 2011
New Revision: 1182992

URL: http://svn.apache.org/viewvc?rev=1182992&view=rev
Log:
LUCENE-3510: don't limit number of prohibited clauses to 32 in BooleanScorer

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1182992&r1=1182991&r2=1182992&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java Thu Oct 13 17:14:15 2011
@@ -62,32 +62,4 @@ public class TestBooleanScorer extends L
     ir.close();
     directory.close();
   }
-  
-  public void testEmptyBucketWithMoreDocs() throws Exception {
-    // This test checks the logic of nextDoc() when all sub scorers have docs
-    // beyond the first bucket (for example). Currently, the code relies on the
-    // 'more' variable to work properly, and this test ensures that if the logic
-    // changes, we have a test to back it up.
-    
-    Similarity sim = Similarity.getDefault();
-    Scorer[] scorers = new Scorer[] {new Scorer(sim) {
-      private int doc = -1;
-      @Override public float score() throws IOException { return 0; }
-      @Override public int docID() { return doc; }
-      
-      @Override public int nextDoc() throws IOException {
-        return doc = doc == -1 ? 3000 : NO_MORE_DOCS;
-      }
-
-      @Override public int advance(int target) throws IOException {
-        return doc = target <= 3000 ? 3000 : NO_MORE_DOCS;
-      }
-      
-    }};
-    BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
-    
-    assertEquals("should have received 3000", 3000, bs.nextDoc());
-    assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());
-  }
-
 }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1182992&r1=1182991&r2=1182992&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Thu Oct 13 17:14:15 2011
@@ -310,7 +310,7 @@ public class BooleanQuery extends Query 
       }
       
       // Check if we can return a BooleanScorer
-      if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
+      if (!scoreDocsInOrder && topScorer && required.size() == 0) {
         return new BooleanScorer(this, disableCoord, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
       }
       
@@ -339,10 +339,6 @@ public class BooleanQuery extends Query 
         }
       }
       
-      if (numProhibited > 32) { // cannot use BS
-        return false;
-      }
-      
       // scorer() will return an out-of-order scorer if requested.
       return true;
     }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1182992&r1=1182991&r2=1182992&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Thu Oct 13 17:14:15 2011
@@ -26,10 +26,10 @@ import org.apache.lucene.search.BooleanC
 /* Description from Doug Cutting (excerpted from
  * LUCENE-1483):
  *
- * BooleanScorer uses a ~16k array to score windows of
- * docs. So it scores docs 0-16k first, then docs 16-32k,
+ * BooleanScorer uses an array to score windows of
+ * 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
  * etc. For each window it iterates through all query terms
- * and accumulates a score in table[doc%16k]. It also stores
+ * and accumulates a score in table[doc%2K]. It also stores
  * in the table a bitmask representing which terms
  * contributed to the score. Non-zero scores are chained in
  * a linked list. At the end of scoring each window it then
@@ -72,9 +72,7 @@ final class BooleanScorer extends Scorer
     public void collect(final int doc) throws IOException {
       final BucketTable table = bucketTable;
       final int i = doc & BucketTable.MASK;
-      Bucket bucket = table.buckets[i];
-      if (bucket == null)
-        table.buckets[i] = bucket = new Bucket();
+      final Bucket bucket = table.buckets[i];
       
       if (bucket.doc != doc) {                    // invalid bucket
         bucket.doc = doc;                         // set doc
@@ -140,6 +138,9 @@ final class BooleanScorer extends Scorer
   static final class Bucket {
     int doc = -1;            // tells if bucket is valid
     float score;             // incremental score
+    // TODO: break out bool anyProhibited, int
+    // numRequiredMatched; then we can remove 32 limit on
+    // required clauses
     int bits;                // used for bool constraints
     int coord;               // count of terms in score
     Bucket next;             // next valid bucket
@@ -153,7 +154,13 @@ final class BooleanScorer extends Scorer
     final Bucket[] buckets = new Bucket[SIZE];
     Bucket first = null;                          // head of valid list
   
-    public BucketTable() {}
+    public BucketTable() {
+      // Pre-fill to save the lazy init when collecting
+      // each sub:
+      for(int idx=0;idx<SIZE;idx++) {
+        buckets[idx] = new Bucket();
+      }
+    }
 
     public Collector newCollector(int mask) {
       return new BooleanScorerCollector(mask, this);
@@ -166,7 +173,7 @@ final class BooleanScorer extends Scorer
     public Scorer scorer;
     // TODO: re-enable this if BQ ever sends us required clauses
     //public boolean required = false;
-    public boolean prohibited = false;
+    public boolean prohibited;
     public Collector collector;
     public SubScorer next;
 
@@ -190,12 +197,13 @@ final class BooleanScorer extends Scorer
   private final float[] coordFactors;
   // TODO: re-enable this if BQ ever sends us required clauses
   //private int requiredMask = 0;
-  private int prohibitedMask = 0;
-  private int nextMask = 1;
   private final int minNrShouldMatch;
   private int end;
   private Bucket current;
   private int doc = -1;
+
+  // Any time a prohibited clause matches we set bit 0:
+  private static final int PROHIBITED_MASK = 1;
   
   BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
@@ -212,11 +220,8 @@ final class BooleanScorer extends Scorer
     
     if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
       for (Scorer scorer : prohibitedScorers) {
-        int mask = nextMask;
-        nextMask = nextMask << 1;
-        prohibitedMask |= mask;                     // update prohibited mask
         if (scorer.nextDoc() != NO_MORE_DOCS) {
-          scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
+          scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
         }
       }
     }
@@ -230,9 +235,12 @@ final class BooleanScorer extends Scorer
   // firstDocID is ignored since nextDoc() initializes 'current'
   @Override
   protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+    // Make sure it's only BooleanScorer that calls us:
+    assert firstDocID == -1;
     boolean more;
     Bucket tmp;
     BucketScorer bs = new BucketScorer(weight);
+
     // The internal loop will set the score and doc before calling collect.
     collector.setScorer(bs);
     do {
@@ -241,12 +249,13 @@ final class BooleanScorer extends Scorer
       while (current != null) {         // more queued 
 
         // check prohibited & required
-        if ((current.bits & prohibitedMask) == 0) {
+        if ((current.bits & PROHIBITED_MASK) == 0) {
 
-            // TODO: re-enable this if BQ ever sends us required
-            // clauses
-            //&& (current.bits & requiredMask) == requiredMask) {
+          // TODO: re-enable this if BQ ever sends us required
+          // clauses
+          //&& (current.bits & requiredMask) == requiredMask) {
           
+          // TODO: can we remove this?  
           if (current.doc >= max){
             tmp = current;
             current = current.next;
@@ -295,48 +304,22 @@ final class BooleanScorer extends Scorer
 
   @Override
   public int docID() {
-    return doc;
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public int nextDoc() throws IOException {
-    boolean more;
-    do {
-      while (bucketTable.first != null) {         // more queued
-        current = bucketTable.first;
-        bucketTable.first = current.next;         // pop the queue
-
-        // check prohibited & required, and minNrShouldMatch
-        if ((current.bits & prohibitedMask) == 0 &&
-            current.coord >= minNrShouldMatch) {
-          // TODO: re-enable this if BQ ever sends us required clauses
-          // (current.bits & requiredMask) == requiredMask &&
-          return doc = current.doc;
-        }
-      }
-
-      // refill the queue
-      more = false;
-      end += BucketTable.SIZE;
-      for (SubScorer sub = scorers; sub != null; sub = sub.next) {
-        int subScorerDocID = sub.scorer.docID();
-        if (subScorerDocID != NO_MORE_DOCS) {
-          more |= sub.scorer.score(sub.collector, end, subScorerDocID);
-        }
-      }
-    } while (bucketTable.first != null || more);
-
-    return doc = NO_MORE_DOCS;
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public float score() {
-    return current.score * coordFactors[current.coord];
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public void score(Collector collector) throws IOException {
-    score(collector, Integer.MAX_VALUE, nextDoc());
+    score(collector, Integer.MAX_VALUE, -1);
   }
   
   @Override

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1182992&r1=1182991&r2=1182992&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java Thu Oct 13 17:14:15 2011
@@ -18,7 +18,9 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -26,7 +28,6 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
-
 import org.apache.lucene.util.LuceneTestCase;
 
 public class TestBooleanScorer extends LuceneTestCase
@@ -85,9 +86,85 @@ public class TestBooleanScorer extends L
       
     }};
     BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
-    
-    assertEquals("should have received 3000", 3000, bs.nextDoc());
-    assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());
+
+    final List<Integer> hits = new ArrayList<Integer>();
+    bs.score(new Collector() {
+      int docBase;
+      @Override
+      public void setScorer(Scorer scorer) {
+      }
+      
+      @Override
+      public void collect(int doc) throws IOException {
+        hits.add(docBase+doc);
+      }
+      
+      @Override
+      public void setNextReader(IndexReader reader, int docBase) {
+        this.docBase = docBase;
+      }
+      
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return true;
+      }
+      });
+
+    assertEquals("should have only 1 hit", 1, hits.size());
+    assertEquals("hit should have been docID=3000", 3000, hits.get(0).intValue());
   }
 
+  public void testMoreThan32ProhibitedClauses() throws Exception {
+    final Directory d = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random, d);
+    Document doc = new Document();
+    doc.add(new Field("field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33", Field.Store.NO, Field.Index.ANALYZED));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new Field("field", "33", Field.Store.NO, Field.Index.ANALYZED));
+    w.addDocument(doc);
+    final IndexReader r = w.getReader();
+    w.close();
+    final IndexSearcher s = newSearcher(r);
+
+    final BooleanQuery q = new BooleanQuery();
+    for(int term=0;term<33;term++) {
+      q.add(new BooleanClause(new TermQuery(new Term("field", ""+term)),
+                              BooleanClause.Occur.MUST_NOT));
+    }
+    q.add(new BooleanClause(new TermQuery(new Term("field", "33")),
+                            BooleanClause.Occur.SHOULD));
+                            
+    final int[] count = new int[1];
+    s.search(q, new Collector() {
+      private Scorer scorer;
+    
+      @Override
+      public void setScorer(Scorer scorer) {
+        // Make sure we got BooleanScorer:
+        this.scorer = scorer;
+        assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", scorer.getClass().getName());
+      }
+      
+      @Override
+      public void collect(int doc) throws IOException {
+        count[0]++;
+      }
+      
+      @Override
+      public void setNextReader(IndexReader reader, int docBase) {
+      }
+      
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return true;
+      }
+    });
+
+    assertEquals(1, count[0]);
+    
+    s.close();
+    r.close();
+    d.close();
+  }
 }