You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/07/28 18:24:43 UTC
[lucene] branch branch_9x updated: Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 7d6ff92b797 Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)
7d6ff92b797 is described below
commit 7d6ff92b7972111c311bca8ecc791364ad75bfeb
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Thu Jul 28 11:12:21 2022 -0700
Add #scoreSupplier support to DocValuesRewriteMethod along with singleton doc value opto (#1020)
---
lucene/CHANGES.txt | 2 +
.../lucene/search/DocValuesRewriteMethod.java | 128 +++++++++++++++------
2 files changed, 92 insertions(+), 38 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3c5f21a966c..28ad9fa2e00 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -22,6 +22,8 @@ Optimizations
* LUCENE-10661: Reduce memory copy in BytesStore. (luyuncheng)
+* GITHUB#1020: Support #scoreSupplier and small optimizations to DocValuesRewriteMethod. (Greg Miller)
+
Bug Fixes
---------------------
(No changes)
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 51d0dd7078c..e0e9efc7822 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -83,21 +84,25 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
- final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
+ final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), query.field);
return MatchesUtils.forField(
query.field,
() ->
DisjunctionMatchesIterator.fromTermsEnum(
- context, doc, query, query.field, getTermsEnum(fcsi)));
+ context, doc, query, query.field, getTermsEnum(values)));
}
- private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
+ /**
+ * Create a TermsEnum that provides the intersection of the query terms with the terms
+ * present in the doc values.
+ */
+ private TermsEnum getTermsEnum(SortedSetDocValues values) throws IOException {
return query.getTermsEnum(
new Terms() {
@Override
public TermsEnum iterator() throws IOException {
- return fcsi.termsEnum();
+ return values.termsEnum();
}
@Override
@@ -143,45 +148,92 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
}
@Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
- TermsEnum termsEnum = getTermsEnum(fcsi);
- assert termsEnum != null;
- if (termsEnum.next() == null) {
- // no matching terms
- return null;
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), query.field);
+ if (values.getValueCount() == 0) {
+ return null; // no values/docs so nothing can match
}
- // fill into a bitset
- // Cannot use FixedBitSet because we require long index (ord):
- final LongBitSet termSet = new LongBitSet(fcsi.getValueCount());
- do {
- long ord = termsEnum.ord();
- if (ord >= 0) {
- termSet.set(ord);
- }
- } while (termsEnum.next() != null);
- return new ConstantScoreScorer(
- this,
- score(),
- scoreMode,
- new TwoPhaseIterator(fcsi) {
+ final Weight weight = this;
+ return new ScorerSupplier() {
+ @Override
+ public Scorer get(long leadCost) throws IOException {
+ // Create a TermsEnum that will provide the intersection of the terms specified in the
+ // query with the values present in the doc values:
+ TermsEnum termsEnum = getTermsEnum(values);
+ assert termsEnum != null;
- @Override
- public boolean matches() throws IOException {
- for (int i = 0; i < fcsi.docValueCount(); i++) {
- if (termSet.get(fcsi.nextOrd())) {
- return true;
- }
- }
- return false;
- }
+ if (termsEnum.next() == null) {
+ // no matching terms
+ return new ConstantScoreScorer(
+ weight, score(), scoreMode, DocIdSetIterator.empty());
+ }
- @Override
- public float matchCost() {
- return 3; // lookup in a bitset
+ // Create a bit set for the "term set" ordinals (these are the terms provided by the
+ // query that are actually present in the doc values field). Cannot use FixedBitSet
+ // because we require long index (ord):
+ final LongBitSet termSet = new LongBitSet(values.getValueCount());
+ do {
+ long ord = termsEnum.ord();
+ if (ord >= 0) {
+ termSet.set(ord);
}
- });
+ } while (termsEnum.next() != null);
+
+ final SortedDocValues singleton = DocValues.unwrapSingleton(values);
+ final TwoPhaseIterator iterator;
+ if (singleton != null) {
+ iterator =
+ new TwoPhaseIterator(singleton) {
+ @Override
+ public boolean matches() throws IOException {
+ return termSet.get(singleton.ordValue());
+ }
+
+ @Override
+ public float matchCost() {
+ return 3; // lookup in a bitset
+ }
+ };
+ } else {
+ iterator =
+ new TwoPhaseIterator(values) {
+ @Override
+ public boolean matches() throws IOException {
+ for (int i = 0; i < values.docValueCount(); i++) {
+ if (termSet.get(values.nextOrd())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public float matchCost() {
+ return 3; // lookup in a bitset
+ }
+ };
+ }
+
+ return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
+ }
+
+ @Override
+ public long cost() {
+ // We have no prior knowledge of how many docs might match for any given query term,
+ // so we assume that all docs with a value could be a match:
+ return values.cost();
+ }
+ };
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
+ return null;
+ }
+ return scorerSupplier.get(Long.MAX_VALUE);
}
@Override