You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2019/12/19 16:15:21 UTC
[lucene-solr] branch master updated: LUCENE-9103: WANDScorer can
miss some hits in some rare conditions.
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 907d114 LUCENE-9103: WANDScorer can miss some hits in some rare conditions.
907d114 is described below
commit 907d1142fa435451b40c072f1d445ee868044b15
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Dec 19 16:51:02 2019 +0100
LUCENE-9103: WANDScorer can miss some hits in some rare conditions.
---
lucene/CHANGES.txt | 2 +
.../java/org/apache/lucene/search/WANDScorer.java | 44 +++++++++++++++-------
2 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ce5cc59..eb7ab8a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -174,6 +174,8 @@ Bug Fixes
* LUCENE-9055: Fix the detection of lines crossing triangles through edge points.
(Ignacio Vera)
+* LUCENE-9103: Disjunctions can miss some hits in some rare conditions. (Adrien Grand)
+
Other
* LUCENE-8979: Code Cleanup: Use entryset for map iteration wherever possible. - Part 2 (Koen De Groote)
diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index a633f08..6152959 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -185,6 +185,7 @@ final class WANDScorer extends Scorer {
}
assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
+ assert doc <= upTo;
return true;
}
@@ -374,17 +375,34 @@ final class WANDScorer extends Scorer {
}
}
+ /**
+ * Update {@code upTo} and maximum scores of sub scorers so that {@code upTo}
+ * is greater than or equal to the next candidate after {@code target}, i.e.
+ * the top of `head`.
+ */
private void updateMaxScoresIfNecessary(int target) throws IOException {
assert lead == null;
- if (head.size() == 0) { // no matches in the current block
- if (upTo != DocIdSetIterator.NO_MORE_DOCS) {
- updateMaxScores(Math.max(target, upTo + 1));
+ while (upTo < DocIdSetIterator.NO_MORE_DOCS) {
+ if (head.size() == 0) {
+ // All clauses could fit in the tail, which means that the sum of the
+ // maximum scores of sub clauses is less than the minimum competitive score.
+ // Move to the next block until this condition becomes false.
+ target = Math.max(target, upTo + 1);
+ updateMaxScores(target);
+ } else if (head.top().doc > upTo) {
+ // We have a next candidate but it's not in the current block. We need to
+ // move to the next block in order to not miss any potential hits between
+ // `target` and `head.top().doc`.
+ assert head.top().doc >= target;
+ updateMaxScores(target);
+ break;
+ } else {
+ break;
}
- } else if (head.top().doc > upTo) { // the next candidate is in a different block
- assert head.top().doc >= target;
- updateMaxScores(target);
}
+
+ assert upTo == DocIdSetIterator.NO_MORE_DOCS || (head.size() > 0 && head.top().doc <= upTo);
}
/** Set 'doc' to the next potential match, and move all disis of 'head' that
@@ -394,14 +412,12 @@ final class WANDScorer extends Scorer {
updateMaxScoresIfNecessary(target);
assert upTo >= target;
- // If the head is empty, it means that the sum of all max scores is not
- // enough to produce a competitive score. So we jump to the next block.
- while (head.size() == 0) {
- if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
- doc = DocIdSetIterator.NO_MORE_DOCS;
- return;
- }
- updateMaxScores(upTo + 1);
+ // updateMaxScores tries to move forward until a block with matches is found
+ // so if the head is empty it means there are no matches at all anymore
+ if (head.size() == 0) {
+ assert upTo == DocIdSetIterator.NO_MORE_DOCS;
+ doc = DocIdSetIterator.NO_MORE_DOCS;
+ return;
}
// The top of `head` defines the next potential match