You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/07/28 18:24:43 UTC

[lucene] branch branch_9x updated: Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 7d6ff92b797 Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)
7d6ff92b797 is described below

commit 7d6ff92b7972111c311bca8ecc791364ad75bfeb
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Thu Jul 28 11:12:21 2022 -0700

    Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)
---
 lucene/CHANGES.txt                                 |   2 +
 .../lucene/search/DocValuesRewriteMethod.java      | 128 +++++++++++++++------
 2 files changed, 92 insertions(+), 38 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3c5f21a966c..28ad9fa2e00 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -22,6 +22,8 @@ Optimizations
 
 * LUCENE-10661: Reduce memory copy in BytesStore. (luyuncheng)
 
+* GITHUB#1020: Support #scoreSupplier and small optimizations to DocValuesRewriteMethod. (Greg Miller)
+
 Bug Fixes
 ---------------------
 (No changes)
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 51d0dd7078c..e0e9efc7822 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -83,21 +84,25 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
 
         @Override
         public Matches matches(LeafReaderContext context, int doc) throws IOException {
-          final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
+          final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), query.field);
           return MatchesUtils.forField(
               query.field,
               () ->
                   DisjunctionMatchesIterator.fromTermsEnum(
-                      context, doc, query, query.field, getTermsEnum(fcsi)));
+                      context, doc, query, query.field, getTermsEnum(values)));
         }
 
-        private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
+        /**
+         * Create a TermsEnum that provides the intersection of the query terms with the terms
+         * present in the doc values.
+         */
+        private TermsEnum getTermsEnum(SortedSetDocValues values) throws IOException {
           return query.getTermsEnum(
               new Terms() {
 
                 @Override
                 public TermsEnum iterator() throws IOException {
-                  return fcsi.termsEnum();
+                  return values.termsEnum();
                 }
 
                 @Override
@@ -143,45 +148,92 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
         }
 
         @Override
-        public Scorer scorer(LeafReaderContext context) throws IOException {
-          final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
-          TermsEnum termsEnum = getTermsEnum(fcsi);
-          assert termsEnum != null;
-          if (termsEnum.next() == null) {
-            // no matching terms
-            return null;
+        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+          final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), query.field);
+          if (values.getValueCount() == 0) {
+            return null; // no values/docs so nothing can match
           }
-          // fill into a bitset
-          // Cannot use FixedBitSet because we require long index (ord):
-          final LongBitSet termSet = new LongBitSet(fcsi.getValueCount());
-          do {
-            long ord = termsEnum.ord();
-            if (ord >= 0) {
-              termSet.set(ord);
-            }
-          } while (termsEnum.next() != null);
 
-          return new ConstantScoreScorer(
-              this,
-              score(),
-              scoreMode,
-              new TwoPhaseIterator(fcsi) {
+          final Weight weight = this;
+          return new ScorerSupplier() {
+            @Override
+            public Scorer get(long leadCost) throws IOException {
+              // Create a TermsEnum that will provide the intersection of the terms specified in the
+              // query with the values present in the doc values:
+              TermsEnum termsEnum = getTermsEnum(values);
+              assert termsEnum != null;
 
-                @Override
-                public boolean matches() throws IOException {
-                  for (int i = 0; i < fcsi.docValueCount(); i++) {
-                    if (termSet.get(fcsi.nextOrd())) {
-                      return true;
-                    }
-                  }
-                  return false;
-                }
+              if (termsEnum.next() == null) {
+                // no matching terms
+                return new ConstantScoreScorer(
+                    weight, score(), scoreMode, DocIdSetIterator.empty());
+              }
 
-                @Override
-                public float matchCost() {
-                  return 3; // lookup in a bitset
+              // Create a bit set for the "term set" ordinals (these are the terms provided by the
+              // query that are actually present in the doc values field). Cannot use FixedBitSet
+              // because we require long index (ord):
+              final LongBitSet termSet = new LongBitSet(values.getValueCount());
+              do {
+                long ord = termsEnum.ord();
+                if (ord >= 0) {
+                  termSet.set(ord);
                 }
-              });
+              } while (termsEnum.next() != null);
+
+              final SortedDocValues singleton = DocValues.unwrapSingleton(values);
+              final TwoPhaseIterator iterator;
+              if (singleton != null) {
+                iterator =
+                    new TwoPhaseIterator(singleton) {
+                      @Override
+                      public boolean matches() throws IOException {
+                        return termSet.get(singleton.ordValue());
+                      }
+
+                      @Override
+                      public float matchCost() {
+                        return 3; // lookup in a bitset
+                      }
+                    };
+              } else {
+                iterator =
+                    new TwoPhaseIterator(values) {
+                      @Override
+                      public boolean matches() throws IOException {
+                        for (int i = 0; i < values.docValueCount(); i++) {
+                          if (termSet.get(values.nextOrd())) {
+                            return true;
+                          }
+                        }
+                        return false;
+                      }
+
+                      @Override
+                      public float matchCost() {
+                        return 3; // lookup in a bitset
+                      }
+                    };
+              }
+
+              return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
+            }
+
+            @Override
+            public long cost() {
+              // We have no prior knowledge of how many docs might match for any given query term,
+              // so we assume that all docs with a value could be a match:
+              return values.cost();
+            }
+          };
+        }
+
+        @Override
+        public Scorer scorer(LeafReaderContext context) throws IOException {
+          final ScorerSupplier scorerSupplier = scorerSupplier(context);
+          if (scorerSupplier == null) {
+            return null;
+          }
+          return scorerSupplier.get(Long.MAX_VALUE);
         }
 
         @Override