You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2022/01/03 13:34:38 UTC
[lucene] branch branch_9x updated: LUCENE-10252: ValueSource.asDoubleValues should not compute the score (#519)
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 7216364 LUCENE-10252: ValueSource.asDoubleValues should not compute the score (#519)
7216364 is described below
commit 72163649b16a909ba21baca4cb6d3840f60855e8
Author: David Smiley <ds...@apache.org>
AuthorDate: Mon Jan 3 08:26:50 2022 -0500
LUCENE-10252: ValueSource.asDoubleValues should not compute the score (#519)
ValueSource.asDoubleValues and asLongValues should not compute the score unless asked to -- typically never. This fixes a performance regression since 7.3 LUCENE-8099 when some older boosting queries were replaced with this.
---
lucene/CHANGES.txt | 4 +
.../lucene/queries/function/ValueSource.java | 81 ++++++-----
.../lucene/queries/function/TestValueSources.java | 156 ++++++++++++++++++++-
3 files changed, 202 insertions(+), 39 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1293cdc..014b13e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -100,6 +100,10 @@ Optimizations
* LUCENE-10321: Tweak MultiRangeQuery interval tree creation to skip "pulling up" mins. (Greg Miller)
+* LUCENE-10252: ValueSource.asDoubleValues and asLongValues should not compute the score unless
+ asked to -- typically never. This fixes a performance regression since 7.3 LUCENE-8099 when some
+ older boosting queries were replaced with this. (David Smiley)
+
Bug Fixes
---------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
index 68539a9..8865838 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
@@ -17,7 +17,6 @@
package org.apache.lucene.queries.function;
import java.io.IOException;
-import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Objects;
@@ -77,56 +76,75 @@ public abstract class ValueSource {
return context;
}
- private static class ScoreAndDoc extends Scorable {
-
- int current = -1;
+ private static class ScorableView extends Scorable {
+ final DoubleValues scores;
+ int docId = -1;
+ int scoresDocId = -1;
float score = 0;
+ public ScorableView(int docId, float score) {
+ this(null);
+ this.docId = this.scoresDocId = docId;
+ this.score = score;
+ }
+
+ public ScorableView(DoubleValues scores) {
+ this.scores = scores == null ? DoubleValues.EMPTY : scores;
+ }
+
@Override
public int docID() {
- return current;
+ return docId;
}
@Override
- public float score() {
+ public float score() throws IOException {
+ // ensure we calculate the score at most once
+ if (scoresDocId != docId) {
+ scoresDocId = docId;
+ if (scores.advanceExact(docId)) {
+ score = (float) scores.doubleValue();
+ } else {
+ score = 0;
+ }
+ }
return score;
}
}
/** Expose this ValueSource as a LongValuesSource */
public LongValuesSource asLongValuesSource() {
- return new WrappedLongValuesSource(this);
+ return new WrappedLongValuesSource(this, null);
}
private static class WrappedLongValuesSource extends LongValuesSource {
private final ValueSource in;
+ private final IndexSearcher searcher;
- private WrappedLongValuesSource(ValueSource in) {
+ private WrappedLongValuesSource(ValueSource in, IndexSearcher searcher) {
this.in = in;
+ this.searcher = searcher;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
- Map<Object, Object> context = new IdentityHashMap<>();
- ScoreAndDoc scorer = new ScoreAndDoc();
+ Map<Object, Object> context = newContext(searcher);
+
+ var scorer = new ScorableView(scores);
context.put("scorer", scorer);
- final FunctionValues fv = in.getValues(context, ctx);
+
+ FunctionValues fv = in.getValues(context, ctx);
return new LongValues() {
@Override
public long longValue() throws IOException {
- return fv.longVal(scorer.current);
+ return fv.longVal(scorer.docId);
}
@Override
public boolean advanceExact(int doc) throws IOException {
- scorer.current = doc;
- if (scores != null && scores.advanceExact(doc)) {
- scorer.score = (float) scores.doubleValue();
- } else {
- scorer.score = 0;
- }
+ scorer.docId = doc;
return fv.exists(doc);
}
};
@@ -162,7 +180,7 @@ public abstract class ValueSource {
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
- return this;
+ return new WrappedLongValuesSource(in, searcher);
}
}
@@ -183,26 +201,22 @@ public abstract class ValueSource {
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
- Map<Object, Object> context = new HashMap<>();
- ScoreAndDoc scorer = new ScoreAndDoc();
+ Map<Object, Object> context = newContext(searcher);
+
+ var scorer = new ScorableView(scores);
context.put("scorer", scorer);
- context.put("searcher", searcher);
+
FunctionValues fv = in.getValues(context, ctx);
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
- return fv.doubleVal(scorer.current);
+ return fv.doubleVal(scorer.docId);
}
@Override
- public boolean advanceExact(int doc) throws IOException {
- scorer.current = doc;
- if (scores != null && scores.advanceExact(doc)) {
- scorer.score = (float) scores.doubleValue();
- } else {
- scorer.score = 0;
- }
+ public boolean advanceExact(int doc) {
+ scorer.docId = doc;
// ValueSource will return values even if exists() is false, generally a default
// of some kind. To preserve this behaviour with the iterator, we need to always
// return 'true' here.
@@ -224,11 +238,8 @@ public abstract class ValueSource {
@Override
public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation)
throws IOException {
- Map<Object, Object> context = new HashMap<>();
- ScoreAndDoc scorer = new ScoreAndDoc();
- scorer.score = scoreExplanation.getValue().floatValue();
- context.put("scorer", scorer);
- context.put("searcher", searcher);
+ Map<Object, Object> context = newContext(searcher);
+ context.put("scorer", new ScorableView(docId, scoreExplanation.getValue().floatValue()));
FunctionValues fv = in.getValues(context, ctx);
return fv.explain(docId);
}
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
index 8575990..75c2103 100644
--- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java
@@ -34,18 +34,60 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
-import org.apache.lucene.queries.function.valuesource.*;
+import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
+import org.apache.lucene.queries.function.valuesource.ConstValueSource;
+import org.apache.lucene.queries.function.valuesource.DivFloatFunction;
+import org.apache.lucene.queries.function.valuesource.DocFreqValueSource;
+import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
+import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
+import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
+import org.apache.lucene.queries.function.valuesource.IDFValueSource;
+import org.apache.lucene.queries.function.valuesource.IfFunction;
+import org.apache.lucene.queries.function.valuesource.IntFieldSource;
+import org.apache.lucene.queries.function.valuesource.JoinDocFreqValueSource;
+import org.apache.lucene.queries.function.valuesource.LinearFloatFunction;
+import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
+import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.queries.function.valuesource.MaxDocValueSource;
+import org.apache.lucene.queries.function.valuesource.MaxFloatFunction;
+import org.apache.lucene.queries.function.valuesource.MinFloatFunction;
+import org.apache.lucene.queries.function.valuesource.MultiBoolFunction;
+import org.apache.lucene.queries.function.valuesource.MultiFloatFunction;
+import org.apache.lucene.queries.function.valuesource.MultiFunction;
+import org.apache.lucene.queries.function.valuesource.MultiValuedDoubleFieldSource;
+import org.apache.lucene.queries.function.valuesource.MultiValuedFloatFieldSource;
+import org.apache.lucene.queries.function.valuesource.MultiValuedIntFieldSource;
+import org.apache.lucene.queries.function.valuesource.MultiValuedLongFieldSource;
+import org.apache.lucene.queries.function.valuesource.NormValueSource;
+import org.apache.lucene.queries.function.valuesource.NumDocsValueSource;
+import org.apache.lucene.queries.function.valuesource.PowFloatFunction;
+import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
+import org.apache.lucene.queries.function.valuesource.QueryValueSource;
+import org.apache.lucene.queries.function.valuesource.RangeMapFloatFunction;
+import org.apache.lucene.queries.function.valuesource.ReciprocalFloatFunction;
+import org.apache.lucene.queries.function.valuesource.ScaleFloatFunction;
+import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
+import org.apache.lucene.queries.function.valuesource.SumTotalTermFreqValueSource;
+import org.apache.lucene.queries.function.valuesource.TFValueSource;
+import org.apache.lucene.queries.function.valuesource.TermFreqValueSource;
+import org.apache.lucene.queries.function.valuesource.TotalTermFreqValueSource;
import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.FilterScorer;
+import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector.Type;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
@@ -674,14 +716,75 @@ public class TestValueSources extends LuceneTestCase {
}
}
- public void testWrappingAsDoubleValues() throws IOException {
+ public void testWrappingAsDoubleValues() throws Exception {
+
+ class AssertScoreComputedOnceQuery extends Query {
+
+ private final Query in;
+
+ public AssertScoreComputedOnceQuery(Query query) {
+ in = query;
+ }
+
+ @Override
+ public String toString(String field) {
+ return in.toString(field);
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
+ throws IOException {
+ return new FilterWeight(in.createWeight(searcher, scoreMode, boost)) {
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ return new FilterScorer(super.scorer(context)) {
+ int lastDocId = -1;
+
+ @Override
+ public float score() throws IOException {
+ assertTrue("shouldn't re-compute score", lastDocId != docID());
+ this.lastDocId = docID();
+ return super.score();
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return in.getMaxScore(upTo);
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ var rewrite = in.rewrite(reader);
+ return rewrite == in ? this : new AssertScoreComputedOnceQuery(rewrite);
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ in.visit(visitor);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int hashCode() {
+ return in.hashCode();
+ }
+ }
FunctionScoreQuery q =
FunctionScoreQuery.boostByValue(
- new TermQuery(new Term("f", "t")),
+ new AssertScoreComputedOnceQuery(new TermQuery(new Term("text", "test"))),
new DoubleFieldSource("double").asDoubleValuesSource());
- searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
+ var topFieldDocs = searcher.search(q, 1);
+ assertTrue(topFieldDocs.scoreDocs.length > 0);
// assert that the query has not cached a reference to the IndexSearcher
FunctionScoreQuery.MultiplicativeBoostValuesSource source1 =
@@ -691,6 +794,51 @@ public class TestValueSources extends LuceneTestCase {
assertNull(source2.searcher);
}
+ /** Tests "scorer" key-value inside the Map context to ValueSource */
+ public void testScorerContext() throws IOException {
+ // a VS that yields the score
+ class ScoreValueSource extends ValueSource {
+ @Override
+ public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext)
+ throws IOException {
+ var scorer = (Scorable) context.get("scorer");
+ assertNotNull(scorer);
+ return new FloatDocValues(this) {
+ @Override
+ public float floatVal(int doc) throws IOException {
+ assertEquals(doc, scorer.docID());
+ return scorer.score();
+ }
+ };
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return this == o; // just for a test
+ }
+
+ @Override
+ public int hashCode() {
+ return 0; // just for a test
+ }
+
+ @Override
+ public String description() {
+ return "score";
+ }
+ }
+
+ var plainQ = new TermQuery(new Term("text", "test"));
+ float origScore = searcher.search(plainQ, 1).scoreDocs[0].score;
+
+ // boosts the score by the value source (which is the score), thus score^2
+ var scoreSquaredQ =
+ FunctionScoreQuery.boostByValue(plainQ, new ScoreValueSource().asDoubleValuesSource());
+ var topFieldDocs = searcher.search(scoreSquaredQ, 1);
+ assertTrue(topFieldDocs.scoreDocs.length > 0);
+ assertEquals(origScore * origScore, topFieldDocs.scoreDocs[0].score, 0.00001);
+ }
+
public void testBuildingFromDoubleValues() throws Exception {
DoubleValuesSource dvs =
ValueSource.fromDoubleValuesSource(DoubleValuesSource.fromDoubleField("double"))