You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2023/02/19 16:33:32 UTC
[lucene] branch main updated: Speed up DocValuesRewriteMethod by making use of sortedness (#12155)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 7506f8462f1 Speed up DocValuesRewriteMethod by making use of sortedness (#12155)
7506f8462f1 is described below

commit 7506f8462f151a6801222e65e4a60e2b04458b26
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sun Feb 19 08:33:26 2023 -0800

    Speed up DocValuesRewriteMethod by making use of sortedness (#12155)
---
 lucene/CHANGES.txt                                        |  2 ++
 .../org/apache/lucene/search/DocValuesRewriteMethod.java  | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a55713e33ef..f232f802185 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -135,6 +135,8 @@ Optimizations
 
 * GITHUB#12050: Reuse HNSW graph for intialization during merge (Jack Mazanec)
 
+* GITHUB#12155: Speed up DocValuesRewriteMethod by making use of sortedness. (Greg Miller)
+
 Bug Fixes
 ---------------------
 (No changes)
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index d450205f330..9c412ff95fe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -172,15 +172,25 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
               // query that are actually present in the doc values field). Cannot use FixedBitSet
               // because we require long index (ord):
               final LongBitSet termSet = new LongBitSet(values.getValueCount());
+              long maxOrd = -1;
               do {
                 long ord = termsEnum.ord();
                 if (ord >= 0) {
+                  assert ord > maxOrd;
+                  maxOrd = ord;
                   termSet.set(ord);
                 }
               } while (termsEnum.next() != null);
 
+              // no terms matched in this segment
+              if (maxOrd < 0) {
+                return new ConstantScoreScorer(
+                    weight, score(), scoreMode, DocIdSetIterator.empty());
+              }
+
               final SortedDocValues singleton = DocValues.unwrapSingleton(values);
               final TwoPhaseIterator iterator;
+              final long max = maxOrd;
               if (singleton != null) {
                 iterator =
                     new TwoPhaseIterator(singleton) {
@@ -200,7 +210,10 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
                       @Override
                       public boolean matches() throws IOException {
                         for (int i = 0; i < values.docValueCount(); i++) {
-                          if (termSet.get(values.nextOrd())) {
+                          long value = values.nextOrd();
+                          if (value > max) {
+                            return false; // values are sorted, terminate
+                          } else if (termSet.get(value)) {
                             return true;
                           }
                         }