You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/08/21 01:30:36 UTC
[09/50] [abbrv] lucene-solr:jira/http2: LUCENE-8448: Boolean queries
now propagates the mininum score to their sub-scorers.
LUCENE-8448: Boolean queries now propagates the mininum score to their sub-scorers.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7ecf9b63
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7ecf9b63
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7ecf9b63
Branch: refs/heads/jira/http2
Commit: 7ecf9b63b9bf25a1662468beb671cd71bd991280
Parents: 3e58e8a
Author: Jim Ferenczi <ji...@apache.org>
Authored: Tue Aug 14 20:09:27 2018 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Tue Aug 14 20:09:27 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../search/BlockMaxConjunctionScorer.java | 1 +
.../apache/lucene/search/ConjunctionScorer.java | 2 -
.../lucene/search/MaxScoreSumPropagator.java | 102 +++++++++-
.../apache/lucene/search/ReqOptSumScorer.java | 8 +
.../org/apache/lucene/search/WANDScorer.java | 1 +
.../search/TestMaxScoreSumPropagator.java | 200 +++++++++++++++++++
7 files changed, 314 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5ae0111..d976bf8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -140,6 +140,9 @@ Optimizations
* LUCENE-8204: Boolean queries with a mix of required and optional clauses are
now faster if the total hit count is not required. (Jim Ferenczi, Adrien Grand)
+* LUCENE-8448: Boolean queries now propagates the mininum score to their sub-scorers.
+ (Jim Ferenczi, Adrien Grand)
+
======================= Lucene 7.5.0 =======================
API Changes:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java
index cbfcd2e..b49c800 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java
@@ -246,6 +246,7 @@ final class BlockMaxConjunctionScorer extends Scorer {
@Override
public void setMinCompetitiveScore(float score) {
minScore = score;
+ maxScorePropagator.setMinCompetitiveScore(score);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
index 7a1b956..5fe5c75 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@@ -27,7 +27,6 @@ class ConjunctionScorer extends Scorer {
final DocIdSetIterator disi;
final Scorer[] scorers;
final Collection<Scorer> required;
- final MaxScoreSumPropagator maxScorePropagator;
/** Create a new {@link ConjunctionScorer}, note that {@code scorers} must be a subset of {@code required}. */
ConjunctionScorer(Weight weight, Collection<Scorer> required, Collection<Scorer> scorers) throws IOException {
@@ -36,7 +35,6 @@ class ConjunctionScorer extends Scorer {
this.disi = ConjunctionDISI.intersectScorers(required);
this.scorers = scorers.toArray(new Scorer[scorers.size()]);
this.required = required;
- this.maxScorePropagator = new MaxScoreSumPropagator(scorers);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java
index 6c204ed..331754c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreSumPropagator.java
@@ -19,8 +19,11 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Collection;
+import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.MathUtil;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
/**
* Utility class to propagate scoring information in {@link BooleanQuery}, which
* compute the score as the sum of the scores of its matching clauses.
@@ -28,12 +31,65 @@ import org.apache.lucene.util.MathUtil;
*/
final class MaxScoreSumPropagator {
+ /**
+ * Return an array which, at index i, stores the sum of all entries of
+ * {@code v} except the one at index i.
+ */
+ private static double[] computeSumOfComplement(float[] v) {
+ // We do not use subtraction on purpose because it would defeat the
+ // upperbound formula that we use for sums.
+ // Naive approach would be O(n^2), but we can do O(n) by computing the
+ // sum for i<j and i>j and then sum them.
+ double[] sum1 = new double[v.length];
+ for (int i = 1; i < sum1.length; ++i) {
+ sum1[i] = sum1[i - 1] + v[i - 1];
+ }
+
+ double[] sum2 = new double[v.length];
+ for (int i = sum2.length - 2; i >= 0; --i) {
+ sum2[i] = sum2[i + 1] + v[i + 1];
+ }
+
+ double[] result = new double[v.length];
+ for (int i = 0; i < result.length; ++i) {
+ result[i] = sum1[i] + sum2[i];
+ }
+ return result;
+ }
+
private final int numClauses;
private final Scorer[] scorers;
+ private final double[] sumOfOtherMaxScores;
MaxScoreSumPropagator(Collection<? extends Scorer> scorerList) throws IOException {
numClauses = scorerList.size();
scorers = scorerList.toArray(new Scorer[numClauses]);
+
+ // We'll need max scores multiple times so we cache them
+ float[] maxScores = new float[numClauses];
+ for (int i = 0; i < numClauses; ++i) {
+ scorers[i].advanceShallow(0);
+ maxScores[i] = scorers[i].getMaxScore(NO_MORE_DOCS);
+ }
+ // Sort by decreasing max score
+ new InPlaceMergeSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ Scorer tmp = scorers[i];
+ scorers[i] = scorers[j];
+ scorers[j] = tmp;
+ float tmpF = maxScores[i];
+ maxScores[i] = maxScores[j];
+ maxScores[j] = tmpF;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return Float.compare(maxScores[j], maxScores[i]);
+ }
+ }.sort(0, scorers.length);
+
+ sumOfOtherMaxScores = computeSumOfComplement(maxScores);
}
void advanceShallow(int target) throws IOException {
@@ -54,6 +110,51 @@ final class MaxScoreSumPropagator {
return scoreSumUpperBound(maxScore);
}
+ void setMinCompetitiveScore(float minScore) {
+ if (minScore == 0) {
+ return ;
+ }
+ // A double that is less than 'minScore' might still be converted to 'minScore'
+ // when casted to a float, so we go to the previous float to avoid this issue
+ float minScoreDown = Math.nextDown(minScore);
+ for (int i = 0; i < numClauses; ++i) {
+ double sumOfOtherMaxScores = this.sumOfOtherMaxScores[i];
+ float minCompetitiveScore = getMinCompetitiveScore(minScoreDown, sumOfOtherMaxScores);
+ if (minCompetitiveScore <= 0) {
+ // given that scorers are sorted by decreasing max score, next scorers will
+ // have 0 as a minimum competitive score too
+ break;
+ }
+ scorers[i].setMinCompetitiveScore(minCompetitiveScore);
+ }
+ }
+
+ /**
+ * Return the minimum score that a Scorer must produce in order for a hit to
+ * be competitive.
+ */
+ private float getMinCompetitiveScore(float minScoreSum, double sumOfOtherMaxScores) {
+ assert numClauses > 0;
+ if (minScoreSum <= sumOfOtherMaxScores) {
+ return 0f;
+ }
+
+ // We need to find a value 'minScore' so that 'minScore + sumOfOtherMaxScores <= minScoreSum'
+ // TODO: is there an efficient way to find the greatest value that meets this requirement?
+ float minScore = (float) (minScoreSum - sumOfOtherMaxScores);
+ int iters = 0;
+ while (scoreSumUpperBound(minScore + sumOfOtherMaxScores) > minScoreSum) {
+ // Important: use ulp of minScoreSum and not minScore to make sure that we
+ // converge quickly.
+ minScore -= Math.ulp(minScoreSum);
+ // this should converge in at most two iterations:
+ // - one because of the subtraction rounding error
+ // - one because of the error introduced by sumUpperBound
+ assert ++iters <= 2 : iters;
+ }
+ return Math.max(minScore, 0f);
+ }
+
private float scoreSumUpperBound(double sum) {
if (numClauses <= 2) {
// When there are only two clauses, the sum is always the same regardless
@@ -72,5 +173,4 @@ final class MaxScoreSumPropagator {
double b = MathUtil.sumRelativeErrorBound(numClauses);
return (float) ((1.0 + 2 * b) * sum);
}
-
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
index dc3ee02..28f64b2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -34,6 +35,7 @@ class ReqOptSumScorer extends Scorer {
private final DocIdSetIterator approximation;
private final TwoPhaseIterator twoPhase;
+ private final MaxScoreSumPropagator maxScorePropagator;
private float minScore = 0;
private float reqMaxScore;
private boolean optIsRequired;
@@ -49,6 +51,11 @@ class ReqOptSumScorer extends Scorer {
super(reqScorer.weight);
assert reqScorer != null;
assert optScorer != null;
+ if (scoreMode == ScoreMode.TOP_SCORES) {
+ this.maxScorePropagator = new MaxScoreSumPropagator(Arrays.asList(reqScorer, optScorer));
+ } else {
+ this.maxScorePropagator = null;
+ }
this.reqScorer = reqScorer;
this.optScorer = optScorer;
@@ -289,6 +296,7 @@ class ReqOptSumScorer extends Scorer {
if (reqMaxScore < minScore) {
optIsRequired = true;
}
+ maxScorePropagator.setMinCompetitiveScore(minScore);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
index 82b8044..01b46f7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java
@@ -193,6 +193,7 @@ final class WANDScorer extends Scorer {
long scaledMinScore = scaleMinScore(minScore, scalingFactor);
assert scaledMinScore >= minCompetitiveScore;
minCompetitiveScore = scaledMinScore;
+ maxScorePropagator.setMinCompetitiveScore(minScore);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ecf9b63/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreSumPropagator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreSumPropagator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreSumPropagator.java
new file mode 100644
index 0000000..d072233
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreSumPropagator.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+public class TestMaxScoreSumPropagator extends LuceneTestCase {
+
+ private static class FakeScorer extends Scorer {
+
+ final float maxScore;
+ float minCompetitiveScore;
+
+ FakeScorer(float maxScore) {
+ super(null);
+ this.maxScore = maxScore;
+ }
+
+ @Override
+ public int docID() {
+ return 0;
+ }
+
+ @Override
+ public float score() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setMinCompetitiveScore(float minCompetitiveScore) {
+ this.minCompetitiveScore = minCompetitiveScore;
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ assert upTo == NO_MORE_DOCS;
+ return maxScore;
+ }
+ }
+
+ public void test0Clause() throws IOException {
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(Collections.emptyList());
+ p.setMinCompetitiveScore(0f); // no exception
+ p.setMinCompetitiveScore(0.5f); // no exception
+ }
+
+ public void test1Clause() throws IOException {
+ FakeScorer a = new FakeScorer(1);
+
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(Collections.singletonList(a));
+ assertEquals(1f, p.getMaxScore(NO_MORE_DOCS), 0f);
+ p.setMinCompetitiveScore(0f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ p.setMinCompetitiveScore(0.5f);
+ assertEquals(Math.nextDown(0.5f), a.minCompetitiveScore, 0f);
+ p.setMinCompetitiveScore(1f);
+ assertEquals(Math.nextDown(1f), a.minCompetitiveScore, 0f);
+ }
+
+ public void test2Clauses() throws IOException {
+ FakeScorer a = new FakeScorer(1);
+ FakeScorer b = new FakeScorer(2);
+
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(Arrays.asList(a, b));
+ assertEquals(3f, p.getMaxScore(NO_MORE_DOCS), 0f);
+
+ p.setMinCompetitiveScore(1f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ assertEquals(0f, b.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(2f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(2f) - 1f, b.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(2.5f);
+ assertEquals(Math.nextDown(2.5f) - 2f, a.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(2.5f) - 1f, b.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(3f);
+ assertEquals(Math.nextDown(3f) - 2f, a.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(3f) - 1f, b.minCompetitiveScore, 0f);
+ }
+
+ public void test3Clauses() throws IOException {
+ FakeScorer a = new FakeScorer(1);
+ FakeScorer b = new FakeScorer(2);
+ FakeScorer c = new FakeScorer(1.5f);
+
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(Arrays.asList(a, b, c));
+ assertEquals(4.5f, p.getMaxScore(NO_MORE_DOCS), 0f);
+
+ p.setMinCompetitiveScore(1f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ assertEquals(0f, b.minCompetitiveScore, 0f);
+ assertEquals(0f, c.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(2f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ assertEquals(0f, b.minCompetitiveScore, 0f);
+ assertEquals(0f, c.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(3f);
+ assertEquals(0f, a.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(3f) - 1f - 1.5f, b.minCompetitiveScore, 0f);
+ assertEquals(0f, c.minCompetitiveScore, 0f);
+
+ p.setMinCompetitiveScore(4f);
+ assertEquals(Math.nextDown(4f) - 2f - 1.5f, a.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(4f) - 1f - 1.5f, b.minCompetitiveScore, 0f);
+ assertEquals(Math.nextDown(4f) - 1f - 2f, c.minCompetitiveScore, 0f);
+ }
+
+ public void test2ClausesRandomScore() throws IOException {
+ for (int iter = 0; iter < 10; ++iter) {
+ FakeScorer a = new FakeScorer(random().nextFloat());
+ FakeScorer b = new FakeScorer(Math.nextUp(a.getMaxScore(NO_MORE_DOCS)) + random().nextFloat());
+
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(Arrays.asList(a, b));
+ assertEquals(a.getMaxScore(NO_MORE_DOCS) + b.getMaxScore(NO_MORE_DOCS), p.getMaxScore(NO_MORE_DOCS), 0f);
+ assertMinCompetitiveScore(Arrays.asList(a, b), p, Math.nextUp(a.getMaxScore(NO_MORE_DOCS)));
+ assertMinCompetitiveScore(Arrays.asList(a, b), p, (a.getMaxScore(NO_MORE_DOCS) + b.getMaxScore(NO_MORE_DOCS)) / 2);
+ assertMinCompetitiveScore(Arrays.asList(a, b), p, Math.nextDown(a.getMaxScore(NO_MORE_DOCS) + b.getMaxScore(NO_MORE_DOCS)));
+ assertMinCompetitiveScore(Arrays.asList(a, b), p, a.getMaxScore(NO_MORE_DOCS) + b.getMaxScore(NO_MORE_DOCS));
+ }
+ }
+
+ public void testNClausesRandomScore() throws IOException {
+ for (int iter = 0; iter < 100; ++iter) {
+ List<FakeScorer> scorers = new ArrayList<>();
+ int numScorers = TestUtil.nextInt(random(), 3, 4 << random().nextInt(8));
+ double sumOfMaxScore = 0;
+ for (int i = 0; i < numScorers; ++i) {
+ float maxScore = random().nextFloat();
+ scorers.add(new FakeScorer(maxScore));
+ sumOfMaxScore += maxScore;
+ }
+
+ MaxScoreSumPropagator p = new MaxScoreSumPropagator(scorers);
+ assertTrue(p.getMaxScore(NO_MORE_DOCS) >= (float) sumOfMaxScore);
+ for (int i = 0; i < 10; ++i) {
+ final float minCompetitiveScore = random().nextFloat() * numScorers;
+ assertMinCompetitiveScore(scorers, p, minCompetitiveScore);
+ // reset
+ for (FakeScorer scorer : scorers) {
+ scorer.minCompetitiveScore = 0;
+ }
+ }
+ }
+ }
+
+ private void assertMinCompetitiveScore(Collection<FakeScorer> scorers, MaxScoreSumPropagator p, float minCompetitiveScore) throws IOException {
+ p.setMinCompetitiveScore(minCompetitiveScore);
+
+ for (FakeScorer scorer : scorers) {
+ if (scorer.minCompetitiveScore == 0f) {
+ // no propagation is performed, still visiting all hits
+ break;
+ }
+ double scoreSum = scorer.minCompetitiveScore;
+ for (FakeScorer scorer2 : scorers) {
+ if (scorer2 != scorer) {
+ scoreSum += scorer2.getMaxScore(NO_MORE_DOCS);
+ }
+ }
+ assertTrue(
+ "scoreSum=" + scoreSum + ", minCompetitiveScore=" + minCompetitiveScore,
+ (float) scoreSum <= minCompetitiveScore);
+ }
+ }
+}
+