You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2017/12/27 15:04:44 UTC

[51/54] [abbrv] lucene-solr:jira/solr-11702: LUCENE-8097: Implement maxScore() on disjunctions.

LUCENE-8097: Implement maxScore() on disjunctions.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e3f90385
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e3f90385
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e3f90385

Branch: refs/heads/jira/solr-11702
Commit: e3f90385b4928e3639ee09a907df323e452c74de
Parents: 01023a9
Author: Adrien Grand <jp...@gmail.com>
Authored: Tue Dec 26 14:19:26 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Dec 26 14:19:26 2017 +0100

----------------------------------------------------------------------
 .../lucene/search/DisjunctionMaxQuery.java      | 18 +++++---
 .../lucene/search/DisjunctionMaxScorer.java     | 46 ++++++++++++++++----
 .../lucene/search/DisjunctionSumScorer.java     | 20 +++++++--
 .../org/apache/lucene/search/WANDScorer.java    | 14 +++++-
 .../java/org/apache/lucene/util/MathUtil.java   | 15 +++++++
 .../queryparser/xml/DisjunctionMaxQuery.xml     |  4 +-
 .../lucene/queryparser/xml/TestCoreParser.java  |  2 +-
 7 files changed, 98 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index 97c02a6..3285baf 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -62,6 +62,9 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
    */
   public DisjunctionMaxQuery(Collection<Query> disjuncts, float tieBreakerMultiplier) {
     Objects.requireNonNull(disjuncts, "Collection of Querys must not be null");
+    if (tieBreakerMultiplier < 0 || tieBreakerMultiplier > 1) {
+      throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
+    }
     this.tieBreakerMultiplier = tieBreakerMultiplier;
     this.disjuncts = disjuncts.toArray(new Query[disjuncts.size()]);
   }
@@ -156,20 +159,25 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
     @Override
     public Explanation explain(LeafReaderContext context, int doc) throws IOException {
       boolean match = false;
-      float max = Float.NEGATIVE_INFINITY;
-      double sum = 0;
+      float max = 0;
+      double otherSum = 0;
       List<Explanation> subs = new ArrayList<>();
       for (Weight wt : weights) {
         Explanation e = wt.explain(context, doc);
         if (e.isMatch()) {
           match = true;
           subs.add(e);
-          sum += e.getValue();
-          max = Math.max(max, e.getValue());
+          float score = e.getValue();
+          if (score >= max) {
+            otherSum += max;
+            max = score;
+          } else {
+            otherSum += score;
+          }
         }
       }
       if (match) {
-        final float score = (float) (max + (sum - max) * tieBreakerMultiplier);
+        final float score = (float) (max + otherSum * tieBreakerMultiplier);
         final String desc = tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:";
         return Explanation.match(score, desc, subs);
       } else {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
index 084de66..c5c3640 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
@@ -19,6 +19,8 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;
 
+import org.apache.lucene.util.MathUtil;
+
 /**
  * The Scorer for DisjunctionMaxQuery.  The union of all documents generated by the the subquery scorers
  * is generated in document number order.  The score for each document is the maximum of the scores computed
@@ -28,6 +30,7 @@ import java.util.List;
 final class DisjunctionMaxScorer extends DisjunctionScorer {
   /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
   private final float tieBreakerMultiplier;
+  private final float maxScore;
 
   /**
    * Creates a new instance of DisjunctionMaxScorer
@@ -43,25 +46,52 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
   DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, List<Scorer> subScorers, boolean needsScores) {
     super(weight, subScorers, needsScores);
     this.tieBreakerMultiplier = tieBreakerMultiplier;
+    if (tieBreakerMultiplier < 0 || tieBreakerMultiplier > 1) {
+      throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
+    }
+
+    float scoreMax = 0;
+    double otherScoreSum = 0;
+    for (Scorer scorer : subScorers) {
+      float subScore = scorer.maxScore();
+      if (subScore >= scoreMax) {
+        otherScoreSum += scoreMax;
+        scoreMax = subScore;
+      } else {
+        otherScoreSum += subScore;
+      }
+    }
+
+    if (tieBreakerMultiplier == 0) {
+      this.maxScore = scoreMax;
+    } else {
+      // The error of sums depends on the order in which values are summed up. In
+      // order to avoid this issue, we compute an upper bound of the value that
+      // the sum may take. If the max relative error is b, then it means that two
+      // sums are always within 2*b of each other.
+      otherScoreSum *= (1 + 2 * MathUtil.sumRelativeErrorBound(subScorers.size() - 1));
+      this.maxScore = (float) (scoreMax + otherScoreSum * tieBreakerMultiplier);
+    }
   }
 
   @Override
   protected float score(DisiWrapper topList) throws IOException {
-    double scoreSum = 0;
-    float scoreMax = Float.NEGATIVE_INFINITY;
+    float scoreMax = 0;
+    double otherScoreSum = 0;
     for (DisiWrapper w = topList; w != null; w = w.next) {
-      final float subScore = w.scorer.score();
-      scoreSum += subScore;
-      if (subScore > scoreMax) {
+      float subScore = w.scorer.score();
+      if (subScore >= scoreMax) {
+        otherScoreSum += scoreMax;
         scoreMax = subScore;
+      } else {
+        otherScoreSum += subScore;
       }
     }
-    return (float) (scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier); 
+    return (float) (scoreMax + otherScoreSum * tieBreakerMultiplier);
   }
 
   @Override
   public float maxScore() {
-    // TODO: implement but be careful about floating-point errors.
-    return Float.POSITIVE_INFINITY;
+    return maxScore;
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
index 729a298..7e22991 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
@@ -20,21 +20,36 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;
 
+import org.apache.lucene.util.MathUtil;
+
 /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
  */
 final class DisjunctionSumScorer extends DisjunctionScorer {
-  
+
+  private final float maxScore;
+
   /** Construct a <code>DisjunctionScorer</code>.
    * @param weight The weight to be used.
    * @param subScorers Array of at least two subscorers.
    */
   DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
     super(weight, subScorers, needsScores);
+    double maxScore = 0;
+    for (Scorer scorer : subScorers) {
+      maxScore += scorer.maxScore();
+    }
+    // The error of sums depends on the order in which values are summed up. In
+    // order to avoid this issue, we compute an upper bound of the value that
+    // the sum may take. If the max relative error is b, then it means that two
+    // sums are always within 2*b of each other.
+    double maxScoreRelativeErrorBound = MathUtil.sumRelativeErrorBound(subScorers.size());
+    this.maxScore = (float) ((1.0 + 2 * maxScoreRelativeErrorBound) * maxScore);
   }
 
   @Override
   protected float score(DisiWrapper topList) throws IOException {
     double score = 0;
+
     for (DisiWrapper w = topList; w != null; w = w.next) {
       score += w.scorer.score();
     }
@@ -43,8 +58,7 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
 
   @Override
   public float maxScore() {
-    // TODO: implement it but be careful with floating-point errors
-    return Float.POSITIVE_INFINITY;
+    return maxScore;
   }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index 2f3b600..f5f647e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -26,6 +26,8 @@ import java.util.Collection;
 import java.util.List;
 import java.util.OptionalInt;
 
+import org.apache.lucene.util.MathUtil;
+
 /**
  * This implements the WAND (Weak AND) algorithm for dynamic pruning
  * described in "Efficient Query Evaluation using a Two-Level Retrieval
@@ -120,6 +122,7 @@ final class WANDScorer extends Scorer {
   int tailSize;
 
   final long cost;
+  final float maxScore;
 
   WANDScorer(Weight weight, Collection<Scorer> scorers) {
     super(weight);
@@ -142,10 +145,12 @@ final class WANDScorer extends Scorer {
     // Use a scaling factor of 0 if all max scores are either 0 or +Infty
     this.scalingFactor = scalingFactor.orElse(0);
     
+    double maxScoreSum = 0;
     for (Scorer scorer : scorers) {
       DisiWrapper w = new DisiWrapper(scorer);
       float maxScore = scorer.maxScore();
       w.maxScore = scaleMaxScore(maxScore, this.scalingFactor);
+      maxScoreSum += maxScore;
       addLead(w);
     }
 
@@ -154,6 +159,12 @@ final class WANDScorer extends Scorer {
       cost += w.cost;
     }
     this.cost = cost;
+    // The error of sums depends on the order in which values are summed up. In
+    // order to avoid this issue, we compute an upper bound of the value that
+    // the sum may take. If the max relative error is b, then it means that two
+    // sums are always within 2*b of each other.
+    double maxScoreRelativeErrorBound = MathUtil.sumRelativeErrorBound(scorers.size());
+    this.maxScore = (float) ((1.0 + 2 * maxScoreRelativeErrorBound) * maxScoreSum);
   }
 
   // returns a boolean so that it can be called from assert
@@ -375,8 +386,7 @@ final class WANDScorer extends Scorer {
 
   @Override
   public float maxScore() {
-    // TODO: implement but be careful about floating-point errors.
-    return Float.POSITIVE_INFINITY;
+    return maxScore;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/core/src/java/org/apache/lucene/util/MathUtil.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/MathUtil.java b/lucene/core/src/java/org/apache/lucene/util/MathUtil.java
index 09437fe..7430c5d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/MathUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MathUtil.java
@@ -149,5 +149,20 @@ public final class MathUtil {
     return mult * Math.log((1.0d + a) / (1.0d - a));
   }
 
+  /**
+   * Return a relative error bound for a sum of {@code numValues} positive doubles,
+   * computed using recursive summation, ie. sum = x1 + ... + xn.
+   * NOTE: This only works if all values are POSITIVE so that Σ |xi| == |Σ xi|.
+   * This uses formula 3.5 from Higham, Nicholas J. (1993),
+   * "The accuracy of floating point summation", SIAM Journal on Scientific Computing.
+   */
+  public static double sumRelativeErrorBound(int numValues) {
+    if (numValues <= 1) {
+      return 0;
+    }
+    // u = unit roundoff in the paper, also called machine precision or machine epsilon
+    double u = Math.scalb(1.0, -52);
+    return (numValues - 1) * u;
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/DisjunctionMaxQuery.xml
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/DisjunctionMaxQuery.xml b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/DisjunctionMaxQuery.xml
index ebf1400..0c94b00 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/DisjunctionMaxQuery.xml
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/DisjunctionMaxQuery.xml
@@ -18,7 +18,7 @@
 
 <DisjunctionMaxQuery>
   <TermQuery fieldName="a">merger</TermQuery>
-  <DisjunctionMaxQuery tieBreaker="1.2">
+  <DisjunctionMaxQuery tieBreaker="0.3">
     <TermQuery fieldName="b">verger</TermQuery>
   </DisjunctionMaxQuery>
-</DisjunctionMaxQuery>
\ No newline at end of file
+</DisjunctionMaxQuery>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e3f90385/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
index d97e2f6..b9e44c1 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
@@ -102,7 +102,7 @@ public class TestCoreParser extends LuceneTestCase {
     assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f);
     assertEquals(2, d.getDisjuncts().size());
     DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1);
-    assertEquals(1.2f, ndq.getTieBreakerMultiplier(), 0.0001f);
+    assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f);
     assertEquals(1, ndq.getDisjuncts().size());
   }