You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2023/02/19 16:33:32 UTC
[lucene] branch main updated: Speed up DocValuesRewriteMethod by making use of sortedness (#12155)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 7506f8462f1 Speed up DocValuesRewriteMethod by making use of sortedness (#12155)
7506f8462f1 is described below
commit 7506f8462f151a6801222e65e4a60e2b04458b26
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sun Feb 19 08:33:26 2023 -0800
Speed up DocValuesRewriteMethod by making use of sortedness (#12155)
---
lucene/CHANGES.txt | 2 ++
.../org/apache/lucene/search/DocValuesRewriteMethod.java | 15 ++++++++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a55713e33ef..f232f802185 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -135,6 +135,8 @@ Optimizations
* GITHUB#12050: Reuse HNSW graph for intialization during merge (Jack Mazanec)
+* GITHUB#12155: Speed up DocValuesRewriteMethod by making use of sortedness. (Greg Miller)
+
Bug Fixes
---------------------
(No changes)
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index d450205f330..9c412ff95fe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -172,15 +172,25 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
// query that are actually present in the doc values field). Cannot use FixedBitSet
// because we require long index (ord):
final LongBitSet termSet = new LongBitSet(values.getValueCount());
+ long maxOrd = -1;
do {
long ord = termsEnum.ord();
if (ord >= 0) {
+ assert ord > maxOrd;
+ maxOrd = ord;
termSet.set(ord);
}
} while (termsEnum.next() != null);
+ // no terms matched in this segment
+ if (maxOrd < 0) {
+ return new ConstantScoreScorer(
+ weight, score(), scoreMode, DocIdSetIterator.empty());
+ }
+
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
+ final long max = maxOrd;
if (singleton != null) {
iterator =
new TwoPhaseIterator(singleton) {
@@ -200,7 +210,10 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public boolean matches() throws IOException {
for (int i = 0; i < values.docValueCount(); i++) {
- if (termSet.get(values.nextOrd())) {
+ long value = values.nextOrd();
+ if (value > max) {
+ return false; // values are sorted, terminate
+ } else if (termSet.get(value)) {
return true;
}
}