You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2023/02/17 14:37:36 UTC
[lucene] branch branch_9x updated: Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 6adad0fb103 Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)
6adad0fb103 is described below

commit 6adad0fb10396f24cbc8ce648d3934bd28519930
Author: Robert Muir <rm...@apache.org>
AuthorDate: Fri Feb 17 08:25:17 2023 -0500

    Implement ScorerSupplier for Sorted(Set)DocValuesField#newSlowRangeQuery (#12132)
    
    Similar to use of ScorerSupplier in #12129, implement it here too,
    because creation of a Scorer requires lookupTerm() operations in the DV
    terms dictionary. This results in wasted effort/random accesses, if, based on the cost(),
    IndexOrDocValuesQuery decides not to use this query.
---
 .../document/SortedSetDocValuesRangeQuery.java     | 163 ++++++++++++---------
 1 file changed, 95 insertions(+), 68 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
index 67aa2daf3ef..f7eab990d3d 100644
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
@@ -25,12 +25,14 @@ import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
 import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.BytesRef;
@@ -107,84 +109,109 @@ final class SortedSetDocValuesRangeQuery extends Query {
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
       throws IOException {
     return new ConstantScoreWeight(this, boost) {
+
       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
-        if (context.reader().getFieldInfos().fieldInfo(field) == null) {
+        ScorerSupplier scorerSupplier = scorerSupplier(context);
+        if (scorerSupplier == null) {
           return null;
         }
-        SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
+        return scorerSupplier.get(Long.MAX_VALUE);
+      }
 
-        final long minOrd;
-        if (lowerValue == null) {
-          minOrd = 0;
-        } else {
-          final long ord = values.lookupTerm(lowerValue);
-          if (ord < 0) {
-            minOrd = -1 - ord;
-          } else if (lowerInclusive) {
-            minOrd = ord;
-          } else {
-            minOrd = ord + 1;
-          }
+      @Override
+      public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+        final Weight weight = this;
+        if (context.reader().getFieldInfos().fieldInfo(field) == null) {
+          return null;
         }
+        SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
 
-        final long maxOrd;
-        if (upperValue == null) {
-          maxOrd = values.getValueCount() - 1;
-        } else {
-          final long ord = values.lookupTerm(upperValue);
-          if (ord < 0) {
-            maxOrd = -2 - ord;
-          } else if (upperInclusive) {
-            maxOrd = ord;
-          } else {
-            maxOrd = ord - 1;
-          }
-        }
+        // implement ScorerSupplier, since we do some expensive stuff to make a scorer
+        return new ScorerSupplier() {
+          @Override
+          public Scorer get(long leadCost) throws IOException {
+
+            final long minOrd;
+            if (lowerValue == null) {
+              minOrd = 0;
+            } else {
+              final long ord = values.lookupTerm(lowerValue);
+              if (ord < 0) {
+                minOrd = -1 - ord;
+              } else if (lowerInclusive) {
+                minOrd = ord;
+              } else {
+                minOrd = ord + 1;
+              }
+            }
+
+            final long maxOrd;
+            if (upperValue == null) {
+              maxOrd = values.getValueCount() - 1;
+            } else {
+              final long ord = values.lookupTerm(upperValue);
+              if (ord < 0) {
+                maxOrd = -2 - ord;
+              } else if (upperInclusive) {
+                maxOrd = ord;
+              } else {
+                maxOrd = ord - 1;
+              }
+            }
+
+            // no terms matched in this segment
+            if (minOrd > maxOrd) {
+              return new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.empty());
+            }
+
+            final SortedDocValues singleton = DocValues.unwrapSingleton(values);
+            final TwoPhaseIterator iterator;
+            if (singleton != null) {
+              iterator =
+                  new TwoPhaseIterator(singleton) {
+                    @Override
+                    public boolean matches() throws IOException {
+                      final long ord = singleton.ordValue();
+                      return ord >= minOrd && ord <= maxOrd;
+                    }
 
-        if (minOrd > maxOrd) {
-          return null;
-        }
+                    @Override
+                    public float matchCost() {
+                      return 2; // 2 comparisons
+                    }
+                  };
+            } else {
+              iterator =
+                  new TwoPhaseIterator(values) {
+                    @Override
+                    public boolean matches() throws IOException {
+                      for (int i = 0; i < values.docValueCount(); i++) {
+                        long ord = values.nextOrd();
+                        if (ord < minOrd) {
+                          continue;
+                        }
+                        // Values are sorted, so the first ord that is >= minOrd is our best
+                        // candidate
+                        return ord <= maxOrd;
+                      }
+                      return false; // all ords were < minOrd
+                    }
 
-        final SortedDocValues singleton = DocValues.unwrapSingleton(values);
-        final TwoPhaseIterator iterator;
-        if (singleton != null) {
-          iterator =
-              new TwoPhaseIterator(singleton) {
-                @Override
-                public boolean matches() throws IOException {
-                  final long ord = singleton.ordValue();
-                  return ord >= minOrd && ord <= maxOrd;
-                }
-
-                @Override
-                public float matchCost() {
-                  return 2; // 2 comparisons
-                }
-              };
-        } else {
-          iterator =
-              new TwoPhaseIterator(values) {
-                @Override
-                public boolean matches() throws IOException {
-                  for (int i = 0; i < values.docValueCount(); i++) {
-                    long ord = values.nextOrd();
-                    if (ord < minOrd) {
-                      continue;
+                    @Override
+                    public float matchCost() {
+                      return 2; // 2 comparisons
                     }
-                    // Values are sorted, so the first ord that is >= minOrd is our best candidate
-                    return ord <= maxOrd;
-                  }
-                  return false; // all ords were < minOrd
-                }
-
-                @Override
-                public float matchCost() {
-                  return 2; // 2 comparisons
-                }
-              };
-        }
-        return new ConstantScoreScorer(this, score(), scoreMode, iterator);
+                  };
+            }
+            return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
+          }
+
+          @Override
+          public long cost() {
+            return values.cost();
+          }
+        };
       }
 
       @Override