You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/10 18:44:48 UTC

svn commit: r1466558 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/

Author: rmuir
Date: Wed Apr 10 16:44:47 2013
New Revision: 1466558

URL: http://svn.apache.org/r1466558
Log:
LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Apr 10 16:44:47 2013
@@ -173,6 +173,9 @@ Optimizations
 * LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
   non-array-lookup version. (Dawid Weiss)
 
+* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
+  (Robert Muir)
+
 API Changes
 
 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Wed Apr 10 16:44:47 2013
@@ -346,11 +346,21 @@ public class BooleanQuery extends Query 
         return null;
       }
       
+      // simple conjunction
       if (optional.size() == 0 && prohibited.size() == 0) {
         float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
         return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
       }
       
+      // simple disjunction
+      if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
+        float coord[] = new float[optional.size()+1];
+        for (int i = 0; i < coord.length; i++) {
+          coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
+        }
+        return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
+      }
+      
       // Return a BooleanScorer2
       return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
     }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java Wed Apr 10 16:44:47 2013
@@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
     // each scorer from the list counted as a single matcher
     if (minNrShouldMatch > 1) {
       return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
-        private int lastScoredDoc = -1;
-        // Save the score of lastScoredDoc, so that we don't compute it more than
-        // once in score().
-        private float lastDocScore = Float.NaN;
-        @Override public float score() throws IOException {
-          int doc = docID();
-          if (doc >= lastScoredDoc) {
-            if (doc > lastScoredDoc) {
-              lastDocScore = super.score();
-              lastScoredDoc = doc;
-            }
-            coordinator.nrMatchers += super.nrMatchers;
-          }
-        return lastDocScore;
+        @Override 
+        public float score() throws IOException {
+          coordinator.nrMatchers += super.nrMatchers;
+          return super.score();
         }
       };
     } else {
-      return new DisjunctionSumScorer(weight, scorers) {
-        private int lastScoredDoc = -1;
-        // Save the score of lastScoredDoc, so that we don't compute it more than
-        // once in score().
-        private float lastDocScore = Float.NaN;
-        @Override public float score() throws IOException {
-          int doc = docID();
-          if (doc >= lastScoredDoc) {
-            if (doc > lastScoredDoc) {
-              lastDocScore = super.score();
-              lastScoredDoc = doc;
-            }
-            coordinator.nrMatchers += super.nrMatchers;
-          }
-        return lastDocScore;
+      // we pass null for coord[] since we coordinate ourselves and override score()
+      return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
+        @Override 
+        public float score() throws IOException {
+          coordinator.nrMatchers += super.nrMatchers;
+          return (float) super.score;
         }
       };
     }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Wed Apr 10 16:44:47 2013
@@ -17,84 +17,58 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.util.List;
 import java.io.IOException;
 
 /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
  * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. 
  */
 class DisjunctionSumScorer extends DisjunctionScorer { 
-  /** The minimum number of scorers that should match. */
-  private final int minimumNrMatchers;
-  
   /** The document number of the current match. */
   private int doc = -1;
 
   /** The number of subscorers that provide the current match. */
   protected int nrMatchers = -1;
 
-  private double score = Float.NaN;
+  protected double score = Float.NaN;
+  private final float[] coord;
   
   /** Construct a <code>DisjunctionScorer</code>.
    * @param weight The weight to be used.
-   * @param subScorers A collection of at least two subscorers.
-   * @param minimumNrMatchers The positive minimum number of subscorers that should
-   * match to match this query.
-   * <br>When <code>minimumNrMatchers</code> is bigger than
-   * the number of <code>subScorers</code>,
-   * no matches will be produced.
-   * <br>When minimumNrMatchers equals the number of subScorers,
-   * it more efficient to use <code>ConjunctionScorer</code>.
+   * @param subScorers Array of at least two subscorers.
+   * @param coord Table of coordination factors
    */
-  public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
-    super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
+  DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
+    super(weight, subScorers, subScorers.length);
 
-    if (minimumNrMatchers <= 0) {
-      throw new IllegalArgumentException("Minimum nr of matchers must be positive");
-    }
     if (numScorers <= 1) {
       throw new IllegalArgumentException("There must be at least 2 subScorers");
     }
-
-    this.minimumNrMatchers = minimumNrMatchers;
-  }
-  
-  /** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
-   * of matching subscorers.
-   */
-  public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
-    this(weight, subScorers, 1);
+    this.coord = coord;
   }
 
   @Override
   public int nextDoc() throws IOException {
     assert doc != NO_MORE_DOCS;
     while(true) {
-      while (subScorers[0].docID() == doc) {
-        if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
-          heapAdjust(0);
-        } else {
-          heapRemoveRoot();
-          if (numScorers < minimumNrMatchers) {
-            return doc = NO_MORE_DOCS;
-          }
+      if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
+        heapAdjust(0);
+      } else {
+        heapRemoveRoot();
+        if (numScorers == 0) {
+          return doc = NO_MORE_DOCS;
         }
       }
-      afterNext();
-      if (nrMatchers >= minimumNrMatchers) {
-        break;
+      if (subScorers[0].docID() != doc) {
+        afterNext();
+        return doc;
       }
     }
-    
-    return doc;
   }
   
   private void afterNext() throws IOException {
     final Scorer sub = subScorers[0];
     doc = sub.docID();
-    if (doc == NO_MORE_DOCS) {
-      nrMatchers = Integer.MAX_VALUE; // stop looping
-    } else {
+    if (doc != NO_MORE_DOCS) {
       score = sub.score();
       nrMatchers = 1;
       countMatches(1);
@@ -104,9 +78,8 @@ class DisjunctionSumScorer extends Disju
   
   // TODO: this currently scores, but so did the previous impl
   // TODO: remove recursion.
-  // TODO: if we separate scoring, out of here, modify this
-  // and afterNext() to terminate when nrMatchers == minimumNrMatchers
-  // then also change freq() to just always compute it from scratch
+  // TODO: if we separate scoring, out of here, 
+  // then change freq() to just always compute it from scratch
   private void countMatches(int root) throws IOException {
     if (root < numScorers && subScorers[root].docID() == doc) {
       nrMatchers++;
@@ -121,7 +94,7 @@ class DisjunctionSumScorer extends Disju
    */
   @Override
   public float score() throws IOException { 
-    return (float)score; 
+    return (float)score * coord[nrMatchers]; 
   }
    
   @Override
@@ -146,8 +119,8 @@ class DisjunctionSumScorer extends Disju
    */
   @Override
   public int advance(int target) throws IOException {
-    if (numScorers == 0) return doc = NO_MORE_DOCS;
-    while (subScorers[0].docID() < target) {
+    assert doc != NO_MORE_DOCS;
+    while(true) {
       if (subScorers[0].advance(target) != NO_MORE_DOCS) {
         heapAdjust(0);
       } else {
@@ -156,14 +129,10 @@ class DisjunctionSumScorer extends Disju
           return doc = NO_MORE_DOCS;
         }
       }
-    }
-    
-    afterNext();
-
-    if (nrMatchers >= minimumNrMatchers) {
-      return doc;
-    } else {
-      return nextDoc();
+      if (subScorers[0].docID() >= target) {
+        afterNext();
+        return doc;
+      }
     }
   }
 }