You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2023/02/17 14:37:36 UTC
[lucene] branch branch_9x updated: Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 6adad0fb103 Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)
6adad0fb103 is described below
commit 6adad0fb10396f24cbc8ce648d3934bd28519930
Author: Robert Muir <rm...@apache.org>
AuthorDate: Fri Feb 17 08:25:17 2023 -0500
Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)
Similar to use of ScorerSupplier in #12129, implement it here too,
because creation of a Scorer requires lookupTerm() operations in the DV
terms dictionary. This results in wasted effort/random accesses, if, based on the cost(),
IndexOrDocValuesQuery decides not to use this query.
---
.../document/SortedSetDocValuesRangeQuery.java | 163 ++++++++++++---------
1 file changed, 95 insertions(+), 68 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
index 67aa2daf3ef..f7eab990d3d 100644
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
@@ -25,12 +25,14 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BytesRef;
@@ -107,84 +109,109 @@ final class SortedSetDocValuesRangeQuery extends Query {
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new ConstantScoreWeight(this, boost) {
+
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
- if (context.reader().getFieldInfos().fieldInfo(field) == null) {
+ ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
return null;
}
- SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
+ return scorerSupplier.get(Long.MAX_VALUE);
+ }
- final long minOrd;
- if (lowerValue == null) {
- minOrd = 0;
- } else {
- final long ord = values.lookupTerm(lowerValue);
- if (ord < 0) {
- minOrd = -1 - ord;
- } else if (lowerInclusive) {
- minOrd = ord;
- } else {
- minOrd = ord + 1;
- }
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final Weight weight = this;
+ if (context.reader().getFieldInfos().fieldInfo(field) == null) {
+ return null;
}
+ SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
- final long maxOrd;
- if (upperValue == null) {
- maxOrd = values.getValueCount() - 1;
- } else {
- final long ord = values.lookupTerm(upperValue);
- if (ord < 0) {
- maxOrd = -2 - ord;
- } else if (upperInclusive) {
- maxOrd = ord;
- } else {
- maxOrd = ord - 1;
- }
- }
+ // implement ScorerSupplier, since we do some expensive stuff to make a scorer
+ return new ScorerSupplier() {
+ @Override
+ public Scorer get(long leadCost) throws IOException {
+
+ final long minOrd;
+ if (lowerValue == null) {
+ minOrd = 0;
+ } else {
+ final long ord = values.lookupTerm(lowerValue);
+ if (ord < 0) {
+ minOrd = -1 - ord;
+ } else if (lowerInclusive) {
+ minOrd = ord;
+ } else {
+ minOrd = ord + 1;
+ }
+ }
+
+ final long maxOrd;
+ if (upperValue == null) {
+ maxOrd = values.getValueCount() - 1;
+ } else {
+ final long ord = values.lookupTerm(upperValue);
+ if (ord < 0) {
+ maxOrd = -2 - ord;
+ } else if (upperInclusive) {
+ maxOrd = ord;
+ } else {
+ maxOrd = ord - 1;
+ }
+ }
+
+ // no terms matched in this segment
+ if (minOrd > maxOrd) {
+ return new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.empty());
+ }
+
+ final SortedDocValues singleton = DocValues.unwrapSingleton(values);
+ final TwoPhaseIterator iterator;
+ if (singleton != null) {
+ iterator =
+ new TwoPhaseIterator(singleton) {
+ @Override
+ public boolean matches() throws IOException {
+ final long ord = singleton.ordValue();
+ return ord >= minOrd && ord <= maxOrd;
+ }
- if (minOrd > maxOrd) {
- return null;
- }
+ @Override
+ public float matchCost() {
+ return 2; // 2 comparisons
+ }
+ };
+ } else {
+ iterator =
+ new TwoPhaseIterator(values) {
+ @Override
+ public boolean matches() throws IOException {
+ for (int i = 0; i < values.docValueCount(); i++) {
+ long ord = values.nextOrd();
+ if (ord < minOrd) {
+ continue;
+ }
+ // Values are sorted, so the first ord that is >= minOrd is our best
+ // candidate
+ return ord <= maxOrd;
+ }
+ return false; // all ords were < minOrd
+ }
- final SortedDocValues singleton = DocValues.unwrapSingleton(values);
- final TwoPhaseIterator iterator;
- if (singleton != null) {
- iterator =
- new TwoPhaseIterator(singleton) {
- @Override
- public boolean matches() throws IOException {
- final long ord = singleton.ordValue();
- return ord >= minOrd && ord <= maxOrd;
- }
-
- @Override
- public float matchCost() {
- return 2; // 2 comparisons
- }
- };
- } else {
- iterator =
- new TwoPhaseIterator(values) {
- @Override
- public boolean matches() throws IOException {
- for (int i = 0; i < values.docValueCount(); i++) {
- long ord = values.nextOrd();
- if (ord < minOrd) {
- continue;
+ @Override
+ public float matchCost() {
+ return 2; // 2 comparisons
}
- // Values are sorted, so the first ord that is >= minOrd is our best candidate
- return ord <= maxOrd;
- }
- return false; // all ords were < minOrd
- }
-
- @Override
- public float matchCost() {
- return 2; // 2 comparisons
- }
- };
- }
- return new ConstantScoreScorer(this, score(), scoreMode, iterator);
+ };
+ }
+ return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
+ }
+
+ @Override
+ public long cost() {
+ return values.cost();
+ }
+ };
}
@Override