You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2019/12/19 16:15:21 UTC

[lucene-solr] branch master updated: LUCENE-9103: WANDScorer can miss some hits in some rare conditions.

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 907d114  LUCENE-9103: WANDScorer can miss some hits in some rare conditions.
907d114 is described below

commit 907d1142fa435451b40c072f1d445ee868044b15
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Dec 19 16:51:02 2019 +0100

    LUCENE-9103: WANDScorer can miss some hits in some rare conditions.
---
 lucene/CHANGES.txt                                 |  2 +
 .../java/org/apache/lucene/search/WANDScorer.java  | 44 +++++++++++++++-------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ce5cc59..eb7ab8a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -174,6 +174,8 @@ Bug Fixes
 * LUCENE-9055: Fix the detection of lines crossing triangles through edge points.
   (Ignacio Vera)
 
+* LUCENE-9103: Disjunctions can miss some hits in some rare conditions. (Adrien Grand)
+
 Other
 
 * LUCENE-8979: Code Cleanup: Use entryset for map iteration wherever possible. - Part 2 (Koen De Groote)
diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index a633f08..6152959 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -185,6 +185,7 @@ final class WANDScorer extends Scorer {
     }
 
     assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
+    assert doc <= upTo;
 
     return true;
   }
@@ -374,17 +375,34 @@ final class WANDScorer extends Scorer {
     }
   }
 
+  /**
+   * Update {@code upTo} and maximum scores of sub scorers so that {@code upTo}
+   * is greater than or equal to the next candidate after {@code target}, i.e.
+   * the top of `head`.
+   */
   private void updateMaxScoresIfNecessary(int target) throws IOException {
     assert lead == null;
 
-    if (head.size() == 0) { // no matches in the current block
-      if (upTo != DocIdSetIterator.NO_MORE_DOCS) {
-        updateMaxScores(Math.max(target, upTo + 1));
+    while (upTo < DocIdSetIterator.NO_MORE_DOCS) {
+      if (head.size() == 0) {
+        // All clauses could fit in the tail, which means that the sum of the
+        // maximum scores of sub clauses is less than the minimum competitive score.
+        // Move to the next block until this condition becomes false.
+        target = Math.max(target, upTo + 1);
+        updateMaxScores(target);
+      } else if (head.top().doc > upTo) {
+        // We have a next candidate but it's not in the current block. We need to
+        // move to the next block in order to not miss any potential hits between
+        // `target` and `head.top().doc`.
+        assert head.top().doc >= target;
+        updateMaxScores(target);
+        break;
+      } else {
+        break;
       }
-    } else if (head.top().doc > upTo) { // the next candidate is in a different block
-      assert head.top().doc >= target;
-      updateMaxScores(target);
     }
+
+    assert upTo == DocIdSetIterator.NO_MORE_DOCS || (head.size() > 0 && head.top().doc <= upTo);
   }
 
   /** Set 'doc' to the next potential match, and move all disis of 'head' that
@@ -394,14 +412,12 @@ final class WANDScorer extends Scorer {
     updateMaxScoresIfNecessary(target);
     assert upTo >= target;
 
-    // If the head is empty, it means that the sum of all max scores is not
-    // enough to produce a competitive score. So we jump to the next block.
-    while (head.size() == 0) {
-      if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
-        doc = DocIdSetIterator.NO_MORE_DOCS;
-        return;
-      }
-      updateMaxScores(upTo + 1);
+    // updateMaxScores tries to move forward until a block with matches is found
+    // so if the head is empty it means there are no matches at all anymore
+    if (head.size() == 0) {
+      assert upTo == DocIdSetIterator.NO_MORE_DOCS;
+      doc = DocIdSetIterator.NO_MORE_DOCS;
+      return;
     }
 
     // The top of `head` defines the next potential match