You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2022/05/05 09:43:46 UTC
[lucene] branch branch_9x updated: LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new efa5d6f4d43 LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
efa5d6f4d43 is described below
commit efa5d6f4d4354ae87a1e6144dc70aeb52b98bfd2
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu May 5 10:24:28 2022 +0100
LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
The computation of the scaling factor has special cases for these two values,
but the current logic is backwards.
---
.../java/org/apache/lucene/search/WANDScorer.java | 14 +--
.../org/apache/lucene/search/TestWANDScorer.java | 106 ++++++++++++++++++---
2 files changed, 99 insertions(+), 21 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index a6ba7b0d29f..90694e6dc99 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -60,17 +60,17 @@ final class WANDScorer extends Scorer {
* {@code [2^23, 2^24[}. Special cases:
*
* <pre>
- * scalingFactor(0) = scalingFactor(MIN_VALUE) - 1
- * scalingFactor(+Infty) = scalingFactor(MAX_VALUE) + 1
- * </pre>
+ * scalingFactor(0) = scalingFactor(MIN_VALUE) + 1
+ * scalingFactor(+Infty) = scalingFactor(MAX_VALUE) - 1
+ * </pre>
*/
static int scalingFactor(float f) {
if (f < 0) {
throw new IllegalArgumentException("Scores must be positive or null");
} else if (f == 0) {
- return scalingFactor(Float.MIN_VALUE) - 1;
+ return scalingFactor(Float.MIN_VALUE) + 1;
} else if (Float.isInfinite(f)) {
- return scalingFactor(Float.MAX_VALUE) + 1;
+ return scalingFactor(Float.MAX_VALUE) - 1;
} else {
double d = f;
// Since doubles have more amplitude than floats for the
@@ -86,7 +86,6 @@ final class WANDScorer extends Scorer {
* sure we do not miss any matches.
*/
static long scaleMaxScore(float maxScore, int scalingFactor) {
- assert scalingFactor(maxScore) >= scalingFactor;
assert Float.isNaN(maxScore) == false;
assert maxScore >= 0;
@@ -95,7 +94,8 @@ final class WANDScorer extends Scorer {
final double scaled = Math.scalb((double) maxScore, scalingFactor);
if (scaled > MAX_SCALED_SCORE) {
- // This happens if one scorer returns +Infty as a max score
+ // This happens if one scorer returns +Infty as a max score, or if the scorer returns greater
+ // max scores locally than globally - which shouldn't happen with well-behaved scorers
return MAX_SCALED_SCORE;
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
index 5ab0291dec5..5f5d214070c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java
@@ -45,10 +45,17 @@ public class TestWANDScorer extends LuceneTestCase {
doTestScalingFactor(Math.nextUp(Float.MIN_VALUE));
doTestScalingFactor(Float.MAX_VALUE);
doTestScalingFactor(Math.nextDown(Float.MAX_VALUE));
- assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) - 1, WANDScorer.scalingFactor(0));
+ assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) + 1, WANDScorer.scalingFactor(0));
assertEquals(
- WANDScorer.scalingFactor(Float.MAX_VALUE) + 1,
+ WANDScorer.scalingFactor(Float.MAX_VALUE) - 1,
WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
+
+ // Greater scores produce lower scaling factors
+ assertTrue(WANDScorer.scalingFactor(1f) > WANDScorer.scalingFactor(10f));
+ assertTrue(
+ WANDScorer.scalingFactor(Float.MAX_VALUE)
+ > WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
+ assertTrue(WANDScorer.scalingFactor(0f) > WANDScorer.scalingFactor(Float.MIN_VALUE));
}
private void doTestScalingFactor(float f) {
@@ -720,7 +727,65 @@ public class TestWANDScorer extends LuceneTestCase {
dir.close();
}
+ /** Degenerate case: all clauses produce a score of 0. */
+ public void testRandomWithZeroScores() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ int numDocs = atLeast(1000);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ int numValues = random().nextInt(1 << random().nextInt(5));
+ int start = random().nextInt(10);
+ for (int j = 0; j < numValues; ++j) {
+ doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
+ }
+ w.addDocument(doc);
+ }
+ IndexReader reader = DirectoryReader.open(w);
+ w.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ for (int iter = 0; iter < 100; ++iter) {
+ int start = random().nextInt(10);
+ int numClauses = random().nextInt(1 << random().nextInt(5));
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ for (int i = 0; i < numClauses; ++i) {
+ builder.add(
+ maybeWrap(
+ new BoostQuery(
+ new ConstantScoreQuery(
+ new TermQuery(new Term("foo", Integer.toString(start + i)))),
+ 0f)),
+ Occur.SHOULD);
+ }
+ Query query = builder.build();
+
+ CheckHits.checkTopScores(random(), query, searcher);
+
+ int filterTerm = random().nextInt(30);
+ Query filteredQuery =
+ new BooleanQuery.Builder()
+ .add(query, Occur.MUST)
+ .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
+ .build();
+
+ CheckHits.checkTopScores(random(), filteredQuery, searcher);
+ }
+ reader.close();
+ dir.close();
+ }
+
+ /** Test the case when some clauses produce infinite max scores. */
public void testRandomWithInfiniteMaxScore() throws IOException {
+ doTestRandomSpecialMaxScore(Float.POSITIVE_INFINITY);
+ }
+
+ /** Test the case when some clauses produce finite max scores, but their sum overflows. */
+ public void testRandomWithMaxScoreOverflow() throws IOException {
+ doTestRandomSpecialMaxScore(Float.MAX_VALUE);
+ }
+
+ private void doTestRandomSpecialMaxScore(float maxScore) throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
int numDocs = atLeast(1000);
@@ -745,7 +810,8 @@ public class TestWANDScorer extends LuceneTestCase {
Query query = new TermQuery(new Term("foo", Integer.toString(start + i)));
if (random().nextBoolean()) {
query =
- new InfiniteMaxScoreWrapperQuery(query, numDocs / TestUtil.nextInt(random(), 1, 100));
+ new MaxScoreWrapperQuery(
+ query, numDocs / TestUtil.nextInt(random(), 1, 100), maxScore);
}
builder.add(query, Occur.SHOULD);
}
@@ -766,14 +832,16 @@ public class TestWANDScorer extends LuceneTestCase {
dir.close();
}
- private static class InfiniteMaxScoreWrapperScorer extends FilterScorer {
+ private static class MaxScoreWrapperScorer extends FilterScorer {
private final int maxRange;
+ private final float maxScore;
private int lastShallowTarget = -1;
- InfiniteMaxScoreWrapperScorer(Scorer scorer, int maxRange) {
+ MaxScoreWrapperScorer(Scorer scorer, int maxRange, float maxScore) {
super(scorer);
this.maxRange = maxRange;
+ this.maxScore = maxScore;
}
@Override
@@ -785,24 +853,26 @@ public class TestWANDScorer extends LuceneTestCase {
@Override
public float getMaxScore(int upTo) throws IOException {
if (upTo - Math.max(docID(), lastShallowTarget) >= maxRange) {
- return Float.POSITIVE_INFINITY;
+ return maxScore;
}
return in.getMaxScore(upTo);
}
}
- private static class InfiniteMaxScoreWrapperQuery extends Query {
+ private static class MaxScoreWrapperQuery extends Query {
private final Query query;
private final int maxRange;
+ private final float maxScore;
/**
* If asked for the maximum score over a range of doc IDs that is greater than or equal to
- * maxRange, this query will return a maximum score of +Infty
+ * maxRange, this query will return the provided maxScore.
*/
- InfiniteMaxScoreWrapperQuery(Query query, int maxRange) {
+ MaxScoreWrapperQuery(Query query, int maxRange, float maxScore) {
this.query = query;
this.maxRange = maxRange;
+ this.maxScore = maxScore;
}
@Override
@@ -812,19 +882,27 @@ public class TestWANDScorer extends LuceneTestCase {
@Override
public boolean equals(Object obj) {
- return sameClassAs(obj) && query.equals(((InfiniteMaxScoreWrapperQuery) obj).query);
+ if (sameClassAs(obj) == false) {
+ return false;
+ }
+ MaxScoreWrapperQuery that = (MaxScoreWrapperQuery) obj;
+ return query.equals(that.query) && maxRange == that.maxRange && maxScore == that.maxScore;
}
@Override
public int hashCode() {
- return 31 * classHash() + query.hashCode();
+ int hash = classHash();
+ hash = 31 * hash + query.hashCode();
+ hash = 31 * hash + Integer.hashCode(maxRange);
+ hash = 31 * hash + Float.hashCode(maxScore);
+ return hash;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = query.rewrite(reader);
if (rewritten != query) {
- return new InfiniteMaxScoreWrapperQuery(rewritten, maxRange);
+ return new MaxScoreWrapperQuery(rewritten, maxRange, maxScore);
}
return super.rewrite(reader);
}
@@ -842,7 +920,7 @@ public class TestWANDScorer extends LuceneTestCase {
if (scorer == null) {
return null;
} else {
- return new InfiniteMaxScoreWrapperScorer(scorer, maxRange);
+ return new MaxScoreWrapperScorer(scorer, maxRange, maxScore);
}
}
@@ -856,7 +934,7 @@ public class TestWANDScorer extends LuceneTestCase {
@Override
public Scorer get(long leadCost) throws IOException {
- return new InfiniteMaxScoreWrapperScorer(supplier.get(leadCost), maxRange);
+ return new MaxScoreWrapperScorer(supplier.get(leadCost), maxRange, maxScore);
}
@Override