You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2022/05/05 09:43:46 UTC

[lucene] branch branch_9x updated: LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new efa5d6f4d43 LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
efa5d6f4d43 is described below

commit efa5d6f4d4354ae87a1e6144dc70aeb52b98bfd2
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu May 5 10:24:28 2022 +0100

    LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
    
    The computation of the scaling factor has special cases for these two values,
    but the current logic is backwards.
---
 .../java/org/apache/lucene/search/WANDScorer.java  |  14 +--
 .../org/apache/lucene/search/TestWANDScorer.java   | 106 ++++++++++++++++++---
 2 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index a6ba7b0d29f..90694e6dc99 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -60,17 +60,17 @@ final class WANDScorer extends Scorer {
    * {@code [2^23, 2^24[}. Special cases:
    *
    * <pre>
-   *    scalingFactor(0) = scalingFactor(MIN_VALUE) - 1
-   *    scalingFactor(+Infty) = scalingFactor(MAX_VALUE) + 1
-   *  </pre>
+   *    scalingFactor(0) = scalingFactor(MIN_VALUE) + 1
+   *    scalingFactor(+Infty) = scalingFactor(MAX_VALUE) - 1
+   * </pre>
    */
   static int scalingFactor(float f) {
     if (f < 0) {
       throw new IllegalArgumentException("Scores must be positive or null");
     } else if (f == 0) {
-      return scalingFactor(Float.MIN_VALUE) - 1;
+      return scalingFactor(Float.MIN_VALUE) + 1;
     } else if (Float.isInfinite(f)) {
-      return scalingFactor(Float.MAX_VALUE) + 1;
+      return scalingFactor(Float.MAX_VALUE) - 1;
     } else {
       double d = f;
       // Since doubles have more amplitude than floats for the
@@ -86,7 +86,6 @@ final class WANDScorer extends Scorer {
    * sure we do not miss any matches.
    */
   static long scaleMaxScore(float maxScore, int scalingFactor) {
-    assert scalingFactor(maxScore) >= scalingFactor;
     assert Float.isNaN(maxScore) == false;
     assert maxScore >= 0;
 
@@ -95,7 +94,8 @@ final class WANDScorer extends Scorer {
     final double scaled = Math.scalb((double) maxScore, scalingFactor);
 
     if (scaled > MAX_SCALED_SCORE) {
-      // This happens if one scorer returns +Infty as a max score
+      // This happens if one scorer returns +Infty as a max score, or if the scorer returns greater
+      // max scores locally than globally - which shouldn't happen with well-behaved scorers
       return MAX_SCALED_SCORE;
     }
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
index 5ab0291dec5..5f5d214070c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
@@ -45,10 +45,17 @@ public class TestWANDScorer extends LuceneTestCase {
     doTestScalingFactor(Math.nextUp(Float.MIN_VALUE));
     doTestScalingFactor(Float.MAX_VALUE);
     doTestScalingFactor(Math.nextDown(Float.MAX_VALUE));
-    assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) - 1, WANDScorer.scalingFactor(0));
+    assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) + 1, WANDScorer.scalingFactor(0));
     assertEquals(
-        WANDScorer.scalingFactor(Float.MAX_VALUE) + 1,
+        WANDScorer.scalingFactor(Float.MAX_VALUE) - 1,
         WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
+
+    // Greater scores produce lower scaling factors
+    assertTrue(WANDScorer.scalingFactor(1f) > WANDScorer.scalingFactor(10f));
+    assertTrue(
+        WANDScorer.scalingFactor(Float.MAX_VALUE)
+            > WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
+    assertTrue(WANDScorer.scalingFactor(0f) > WANDScorer.scalingFactor(Float.MIN_VALUE));
   }
 
   private void doTestScalingFactor(float f) {
@@ -720,7 +727,65 @@ public class TestWANDScorer extends LuceneTestCase {
     dir.close();
   }
 
+  /** Degenerate case: all clauses produce a score of 0. */
+  public void testRandomWithZeroScores() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    int numDocs = atLeast(1000);
+    for (int i = 0; i < numDocs; ++i) {
+      Document doc = new Document();
+      int numValues = random().nextInt(1 << random().nextInt(5));
+      int start = random().nextInt(10);
+      for (int j = 0; j < numValues; ++j) {
+        doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
+      }
+      w.addDocument(doc);
+    }
+    IndexReader reader = DirectoryReader.open(w);
+    w.close();
+    IndexSearcher searcher = newSearcher(reader);
+
+    for (int iter = 0; iter < 100; ++iter) {
+      int start = random().nextInt(10);
+      int numClauses = random().nextInt(1 << random().nextInt(5));
+      BooleanQuery.Builder builder = new BooleanQuery.Builder();
+      for (int i = 0; i < numClauses; ++i) {
+        builder.add(
+            maybeWrap(
+                new BoostQuery(
+                    new ConstantScoreQuery(
+                        new TermQuery(new Term("foo", Integer.toString(start + i)))),
+                    0f)),
+            Occur.SHOULD);
+      }
+      Query query = builder.build();
+
+      CheckHits.checkTopScores(random(), query, searcher);
+
+      int filterTerm = random().nextInt(30);
+      Query filteredQuery =
+          new BooleanQuery.Builder()
+              .add(query, Occur.MUST)
+              .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
+              .build();
+
+      CheckHits.checkTopScores(random(), filteredQuery, searcher);
+    }
+    reader.close();
+    dir.close();
+  }
+
+  /** Test the case when some clauses produce infinite max scores. */
   public void testRandomWithInfiniteMaxScore() throws IOException {
+    doTestRandomSpecialMaxScore(Float.POSITIVE_INFINITY);
+  }
+
+  /** Test the case when some clauses produce finite max scores, but their sum overflows. */
+  public void testRandomWithMaxScoreOverflow() throws IOException {
+    doTestRandomSpecialMaxScore(Float.MAX_VALUE);
+  }
+
+  private void doTestRandomSpecialMaxScore(float maxScore) throws IOException {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
     int numDocs = atLeast(1000);
@@ -745,7 +810,8 @@ public class TestWANDScorer extends LuceneTestCase {
         Query query = new TermQuery(new Term("foo", Integer.toString(start + i)));
         if (random().nextBoolean()) {
           query =
-              new InfiniteMaxScoreWrapperQuery(query, numDocs / TestUtil.nextInt(random(), 1, 100));
+              new MaxScoreWrapperQuery(
+                  query, numDocs / TestUtil.nextInt(random(), 1, 100), maxScore);
         }
         builder.add(query, Occur.SHOULD);
       }
@@ -766,14 +832,16 @@ public class TestWANDScorer extends LuceneTestCase {
     dir.close();
   }
 
-  private static class InfiniteMaxScoreWrapperScorer extends FilterScorer {
+  private static class MaxScoreWrapperScorer extends FilterScorer {
 
     private final int maxRange;
+    private final float maxScore;
     private int lastShallowTarget = -1;
 
-    InfiniteMaxScoreWrapperScorer(Scorer scorer, int maxRange) {
+    MaxScoreWrapperScorer(Scorer scorer, int maxRange, float maxScore) {
       super(scorer);
       this.maxRange = maxRange;
+      this.maxScore = maxScore;
     }
 
     @Override
@@ -785,24 +853,26 @@ public class TestWANDScorer extends LuceneTestCase {
     @Override
     public float getMaxScore(int upTo) throws IOException {
       if (upTo - Math.max(docID(), lastShallowTarget) >= maxRange) {
-        return Float.POSITIVE_INFINITY;
+        return maxScore;
       }
       return in.getMaxScore(upTo);
     }
   }
 
-  private static class InfiniteMaxScoreWrapperQuery extends Query {
+  private static class MaxScoreWrapperQuery extends Query {
 
     private final Query query;
     private final int maxRange;
+    private final float maxScore;
 
     /**
      * If asked for the maximum score over a range of doc IDs that is greater than or equal to
-     * maxRange, this query will return a maximum score of +Infty
+     * maxRange, this query will return the provided maxScore.
      */
-    InfiniteMaxScoreWrapperQuery(Query query, int maxRange) {
+    MaxScoreWrapperQuery(Query query, int maxRange, float maxScore) {
       this.query = query;
       this.maxRange = maxRange;
+      this.maxScore = maxScore;
     }
 
     @Override
@@ -812,19 +882,27 @@ public class TestWANDScorer extends LuceneTestCase {
 
     @Override
     public boolean equals(Object obj) {
-      return sameClassAs(obj) && query.equals(((InfiniteMaxScoreWrapperQuery) obj).query);
+      if (sameClassAs(obj) == false) {
+        return false;
+      }
+      MaxScoreWrapperQuery that = (MaxScoreWrapperQuery) obj;
+      return query.equals(that.query) && maxRange == that.maxRange && maxScore == that.maxScore;
     }
 
     @Override
     public int hashCode() {
-      return 31 * classHash() + query.hashCode();
+      int hash = classHash();
+      hash = 31 * hash + query.hashCode();
+      hash = 31 * hash + Integer.hashCode(maxRange);
+      hash = 31 * hash + Float.hashCode(maxScore);
+      return hash;
     }
 
     @Override
     public Query rewrite(IndexReader reader) throws IOException {
       Query rewritten = query.rewrite(reader);
       if (rewritten != query) {
-        return new InfiniteMaxScoreWrapperQuery(rewritten, maxRange);
+        return new MaxScoreWrapperQuery(rewritten, maxRange, maxScore);
       }
       return super.rewrite(reader);
     }
@@ -842,7 +920,7 @@ public class TestWANDScorer extends LuceneTestCase {
           if (scorer == null) {
             return null;
           } else {
-            return new InfiniteMaxScoreWrapperScorer(scorer, maxRange);
+            return new MaxScoreWrapperScorer(scorer, maxRange, maxScore);
           }
         }
 
@@ -856,7 +934,7 @@ public class TestWANDScorer extends LuceneTestCase {
 
               @Override
               public Scorer get(long leadCost) throws IOException {
-                return new InfiniteMaxScoreWrapperScorer(supplier.get(leadCost), maxRange);
+                return new MaxScoreWrapperScorer(supplier.get(leadCost), maxRange, maxScore);
               }
 
               @Override