You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2023/02/27 23:35:06 UTC
[lucene] branch branch_9x updated: Better skipping for multi-term queries with a FILTER rewrite. (#12055)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 27e50cbec4a Better skipping for multi-term queries with a FILTER rewrite. (#12055)
27e50cbec4a is described below
commit 27e50cbec4a55acdef50f9b59bbb70f8c4389d03
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Feb 28 00:11:31 2023 +0100
Better skipping for multi-term queries with a FILTER rewrite. (#12055)
This change introduces `MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE`, a new rewrite method
meant to be used in place of `MultiTermQuery#CONSTANT_SCORE_REWRITE` as the default for multi-term
queries that act as a filter. Currently, multi-term queries with a filter rewrite internally rewrite to a
disjunction if 16 terms or less match the query. Otherwise postings lists of
matching terms are collected into a `DocIdSetBuilder`. This change replaces the
latter with a mixed approach where a disjunction is created between the 16
terms that have the highest document frequency and an iterator produced from
the `DocIdSetBuilder` that collects all other terms. On fields that have a
zipfian distribution, it's quite likely that no high-frequency terms make it to
the `DocIdSetBuilder`. This provides two main benefits:
- Queries are less likely to allocate a FixedBitSet of size `maxDoc`.
- Queries are better at skipping or early terminating.
On the other hand, queries that need to consume most or all matching documents may get a slowdown, so
users can still opt-in to the "full filter rewrite" functionality by overriding the rewrite method. This is the new
default for PrefixQuery, WildcardQuery and TermRangeQuery.
Co-authored-by: Adrien Grand <jp...@gmail.com> / Greg Miller <gs...@gmail.com>
---
lucene/CHANGES.txt | 7 +-
.../benchmark/byTask/feeds/EnwikiQueryMaker.java | 2 +-
...AbstractMultiTermQueryConstantScoreWrapper.java | 263 ++++++++++++++++
.../org/apache/lucene/search/AutomatonQuery.java | 2 +-
.../org/apache/lucene/search/MultiTermQuery.java | 27 +-
.../MultiTermQueryConstantScoreBlendedWrapper.java | 136 +++++++++
.../search/MultiTermQueryConstantScoreWrapper.java | 233 ++-------------
.../java/org/apache/lucene/search/PrefixQuery.java | 4 +-
.../java/org/apache/lucene/search/RegexpQuery.java | 4 +-
.../org/apache/lucene/search/ScoringRewrite.java | 3 +-
.../org/apache/lucene/search/TermRangeQuery.java | 6 +-
.../search/UsageTrackingQueryCachingPolicy.java | 1 +
.../org/apache/lucene/search/WildcardQuery.java | 4 +-
.../apache/lucene/search/TestAutomatonQuery.java | 9 +
.../lucene/search/TestAutomatonQueryUnicode.java | 9 +
.../lucene/search/TestFieldCacheRewriteMethod.java | 11 +
.../lucene/search/TestMultiTermConstantScore.java | 330 +++++++++------------
.../lucene/search/TestMultiTermQueryRewrites.java | 1 +
.../org/apache/lucene/search/TestPrefixRandom.java | 2 +-
.../apache/lucene/search/TestRegexpRandom2.java | 2 +-
.../org/apache/lucene/search/TestWildcard.java | 8 +
.../lucene/search/highlight/TestHighlighter.java | 2 +-
.../org/apache/lucene/search/join/TermsQuery.java | 2 +-
.../queryparser/classic/QueryParserBase.java | 12 +-
.../standard/CommonQueryParserConfiguration.java | 22 +-
.../flexible/standard/StandardQueryParser.java | 9 -
.../builders/PrefixWildcardQueryNodeBuilder.java | 2 +-
.../standard/builders/RegexpQueryNodeBuilder.java | 2 +-
.../builders/TermRangeQueryNodeBuilder.java | 2 +-
.../builders/WildcardQueryNodeBuilder.java | 2 +-
.../config/StandardQueryConfigHandler.java | 2 +-
.../MultiTermRewriteMethodProcessor.java | 2 +-
.../flexible/standard/TestQPHelper.java | 12 +-
.../queryparser/util/QueryParserTestBase.java | 2 +-
34 files changed, 683 insertions(+), 454 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 82cf4fa3075..85455e6aa2b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -23,7 +23,10 @@ New Features
Improvements
---------------------
-(No changes)
+
+* GITHUB#12055: MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE rewrite method introduced and used as the new default
+ for multi-term queries with a FILTER rewrite (PrefixQuery, WildcardQuery, TermRangeQuery). This introduces better
+ skipping support for common use-cases. (Adrien Grand, Greg Miller)
Optimizations
---------------------
@@ -31,7 +34,7 @@ Optimizations
* GITHUB#11900: BloomFilteringPostingsFormat now uses multiple hash functions
in order to achieve the same false positive probability with less memory.
(Jean-François Boeuf)
-
+
* GITHUB#12118: Optimize FeatureQuery to TermQuery & weight when scoring is not required. (Ben Trent, Robert Muir)
* GITHUB#12128, GITHUB#12133: Speed up docvalues set query by making use of sortedness. (Robert Muir, Uwe Schindler)
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
index e116fe9191a..1e28cf21e43 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
@@ -135,7 +135,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker {
new WildcardQuery(
new Term(field, "fo*"),
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
- MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
// be wary of unanalyzed text
return new Query[] {
new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java
new file mode 100644
index 00000000000..622264d5059
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Contains functionality common to both {@link MultiTermQueryConstantScoreBlendedWrapper} and
+ * {@link MultiTermQueryConstantScoreWrapper}. Internal implementation detail only. Not meant as an
+ * extension point for users.
+ *
+ * @lucene.internal
+ */
+abstract class AbstractMultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends Query
+ implements Accountable {
+ // mtq that matches 16 terms or less will be executed as a regular disjunction
+ private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
+
+ protected final Q query;
+
+ protected AbstractMultiTermQueryConstantScoreWrapper(Q query) {
+ this.query = query;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ if (query instanceof Accountable) {
+ return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ + RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ + ((Accountable) query).ramBytesUsed();
+ }
+ return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ + RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ + RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
+ }
+
+ @Override
+ public String toString(String field) {
+ // query.toString should be ok for the filter, too, if the query boost is 1.0f
+ return query.toString(field);
+ }
+
+ @Override
+ public boolean equals(final Object other) {
+ return sameClassAs(other)
+ && query.equals(((AbstractMultiTermQueryConstantScoreWrapper<?>) other).query);
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * classHash() + query.hashCode();
+ }
+
+ /** Returns the encapsulated query */
+ public Q getQuery() {
+ return query;
+ }
+
+ /** Returns the field name for this query */
+ public String getField() {
+ return query.getField();
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ if (visitor.acceptField(getField())) {
+ query.visit(visitor.getSubVisitor(BooleanClause.Occur.FILTER, this));
+ }
+ }
+
+ protected static final class TermAndState {
+ final BytesRef term;
+ final TermState state;
+ final int docFreq;
+ final long totalTermFreq;
+
+ TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
+ this.term = term;
+ this.state = state;
+ this.docFreq = docFreq;
+ this.totalTermFreq = totalTermFreq;
+ }
+ }
+
+ protected static final class WeightOrDocIdSetIterator {
+ final Weight weight;
+ final DocIdSetIterator iterator;
+
+ WeightOrDocIdSetIterator(Weight weight) {
+ this.weight = Objects.requireNonNull(weight);
+ this.iterator = null;
+ }
+
+ WeightOrDocIdSetIterator(DocIdSetIterator iterator) {
+ this.iterator = iterator;
+ this.weight = null;
+ }
+ }
+
+ protected abstract static class RewritingWeight extends ConstantScoreWeight {
+ private final MultiTermQuery q;
+ private final ScoreMode scoreMode;
+ private final IndexSearcher searcher;
+
+ protected RewritingWeight(
+ MultiTermQuery q, float boost, ScoreMode scoreMode, IndexSearcher searcher) {
+ super(q, boost);
+ this.q = q;
+ this.scoreMode = scoreMode;
+ this.searcher = searcher;
+ }
+
+ /**
+ * Rewrite the query as either a {@link Weight} or a {@link DocIdSetIterator} wrapped in a
+ * {@link WeightOrDocIdSetIterator}. Before this is called, the weight will attempt to "collect"
+ * found terms up to a threshold. If fewer terms than the threshold are found, the query will
+ * simply be rewritten into a {@link BooleanQuery} and this method will not be called. This will
+ * only be called if it is determined there are more found terms. At the point this method is
+ * invoked, {@code termsEnum} will be positioned on the next "uncollected" term. The terms that
+ * were already collected will be in {@code collectedTerms}.
+ */
+ protected abstract WeightOrDocIdSetIterator rewriteInner(
+ LeafReaderContext context,
+ int fieldDocCount,
+ Terms terms,
+ TermsEnum termsEnum,
+ List<TermAndState> collectedTerms)
+ throws IOException;
+
+ private WeightOrDocIdSetIterator rewrite(LeafReaderContext context) throws IOException {
+ final Terms terms = context.reader().terms(q.field);
+ if (terms == null) {
+ // field does not exist
+ return null;
+ }
+
+ final int fieldDocCount = terms.getDocCount();
+ final TermsEnum termsEnum = q.getTermsEnum(terms);
+ assert termsEnum != null;
+
+ final List<TermAndState> collectedTerms = new ArrayList<>();
+ if (collectTerms(fieldDocCount, termsEnum, collectedTerms)) {
+ // build a boolean query
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ for (TermAndState t : collectedTerms) {
+ final TermStates termStates = new TermStates(searcher.getTopReaderContext());
+ termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
+ bq.add(new TermQuery(new Term(q.field, t.term), termStates), BooleanClause.Occur.SHOULD);
+ }
+ Query q = new ConstantScoreQuery(bq.build());
+ final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
+ return new WeightOrDocIdSetIterator(weight);
+ }
+
+ // Too many terms to rewrite as a simple bq. Invoke rewriteInner logic to handle rewriting:
+ return rewriteInner(context, fieldDocCount, terms, termsEnum, collectedTerms);
+ }
+
+ private boolean collectTerms(int fieldDocCount, TermsEnum termsEnum, List<TermAndState> terms)
+ throws IOException {
+ final int threshold =
+ Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, IndexSearcher.getMaxClauseCount());
+ for (int i = 0; i < threshold; i++) {
+ final BytesRef term = termsEnum.next();
+ if (term == null) {
+ return true;
+ }
+ TermState state = termsEnum.termState();
+ int docFreq = termsEnum.docFreq();
+ TermAndState termAndState =
+ new TermAndState(BytesRef.deepCopyOf(term), state, docFreq, termsEnum.totalTermFreq());
+ if (fieldDocCount == docFreq) {
+ // If the term contains every document with a value for the field, we can ignore all
+ // other terms:
+ terms.clear();
+ terms.add(termAndState);
+ return true;
+ }
+ terms.add(termAndState);
+ }
+ return termsEnum.next() == null;
+ }
+
+ private Scorer scorerForIterator(DocIdSetIterator iterator) {
+ if (iterator == null) {
+ return null;
+ }
+ return new ConstantScoreScorer(this, score(), scoreMode, iterator);
+ }
+
+ @Override
+ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
+ final WeightOrDocIdSetIterator weightOrIterator = rewrite(context);
+ if (weightOrIterator == null) {
+ return null;
+ } else if (weightOrIterator.weight != null) {
+ return weightOrIterator.weight.bulkScorer(context);
+ } else {
+ final Scorer scorer = scorerForIterator(weightOrIterator.iterator);
+ if (scorer == null) {
+ return null;
+ }
+ return new DefaultBulkScorer(scorer);
+ }
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final WeightOrDocIdSetIterator weightOrIterator = rewrite(context);
+ if (weightOrIterator == null) {
+ return null;
+ } else if (weightOrIterator.weight != null) {
+ return weightOrIterator.weight.scorer(context);
+ } else {
+ return scorerForIterator(weightOrIterator.iterator);
+ }
+ }
+
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ final Terms terms = context.reader().terms(q.field);
+ if (terms == null) {
+ return null;
+ }
+ return MatchesUtils.forField(
+ q.field,
+ () ->
+ DisjunctionMatchesIterator.fromTermsEnum(
+ context, doc, q, q.field, q.getTermsEnum(terms)));
+ }
+
+ @Override
+ public boolean isCacheable(LeafReaderContext ctx) {
+ return true;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
index 421ea88c316..c67a4a2c131 100644
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@@ -96,7 +96,7 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
*/
public AutomatonQuery(
final Term term, Automaton automaton, int determinizeWorkLimit, boolean isBinary) {
- this(term, automaton, determinizeWorkLimit, isBinary, CONSTANT_SCORE_REWRITE);
+ this(term, automaton, determinizeWorkLimit, isBinary, CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
index 4abb3c47447..b4f3d371fe5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
@@ -40,10 +40,10 @@ import org.apache.lucene.util.AttributeSource;
* {@link #SCORING_BOOLEAN_REWRITE}, you may encounter a {@link IndexSearcher.TooManyClauses}
* exception during searching, which happens when the number of terms to be searched exceeds {@link
* IndexSearcher#getMaxClauseCount()}. Setting {@link RewriteMethod} to {@link
- * #CONSTANT_SCORE_REWRITE} prevents this.
+ * #CONSTANT_SCORE_BLENDED_REWRITE} or {@link #CONSTANT_SCORE_REWRITE} prevents this.
*
- * <p>The recommended rewrite method is {@link #CONSTANT_SCORE_REWRITE}: it doesn't spend CPU
- * computing unhelpful scores, and is the most performant rewrite method given the query. If you
+ * <p>The recommended rewrite method is {@link #CONSTANT_SCORE_BLENDED_REWRITE}: it doesn't spend
+ * CPU computing unhelpful scores, and is the most performant rewrite method given the query. If you
* need scoring (like {@link FuzzyQuery}, use {@link TopTermsScoringBooleanQueryRewrite} which uses
* a priority queue to only collect competitive terms and not hit this limitation.
*
@@ -69,6 +69,27 @@ public abstract class MultiTermQuery extends Query {
}
}
+ /**
+ * A rewrite method where documents are assigned a constant score equal to the query's boost.
+ * Maintains a boolean query-like implementation over the most costly terms while pre-processing
+ * the less costly terms into a filter bitset. Enforces an upper-limit on the number of terms
+ * allowed in the boolean query-like implementation.
+ *
+ * <p>This method aims to balance the benefits of both {@link #CONSTANT_SCORE_BOOLEAN_REWRITE} and
+ * {@link #CONSTANT_SCORE_REWRITE} by enabling skipping and early termination over costly terms
+ * while limiting the overhead of a BooleanQuery with many terms. It also ensures you cannot hit
+ * {@link org.apache.lucene.search.IndexSearcher.TooManyClauses}. For some use-cases with all low
+ * cost terms, {@link #CONSTANT_SCORE_REWRITE} may be more performant. While for some use-cases
+ * with all high cost terms, {@link #CONSTANT_SCORE_BOOLEAN_REWRITE} may be better.
+ */
+ public static final RewriteMethod CONSTANT_SCORE_BLENDED_REWRITE =
+ new RewriteMethod() {
+ @Override
+ public Query rewrite(IndexReader reader, MultiTermQuery query) {
+ return new MultiTermQueryConstantScoreBlendedWrapper<>(query);
+ }
+ };
+
/**
* A rewrite method that first creates a private Filter, by visiting each term in sequence and
* marking all docs for that term. Matching documents are assigned a constant score equal to the
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java
new file mode 100644
index 00000000000..0247a34d25a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermStates;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * This class provides the functionality behind {@link
+ * MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE}. It maintains a boolean query-like approach over a
+ * limited number of the most costly terms while rewriting the remaining terms into a filter bitset.
+ */
+final class MultiTermQueryConstantScoreBlendedWrapper<Q extends MultiTermQuery>
+ extends AbstractMultiTermQueryConstantScoreWrapper<Q> {
+ // postings lists under this threshold will always be "pre-processed" into a bitset
+ private static final int POSTINGS_PRE_PROCESS_THRESHOLD = 512;
+
+ MultiTermQueryConstantScoreBlendedWrapper(Q query) {
+ super(query);
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
+ throws IOException {
+ return new RewritingWeight(query, boost, scoreMode, searcher) {
+
+ @Override
+ protected WeightOrDocIdSetIterator rewriteInner(
+ LeafReaderContext context,
+ int fieldDocCount,
+ Terms terms,
+ TermsEnum termsEnum,
+ List<TermAndState> collectedTerms)
+ throws IOException {
+ DocIdSetBuilder otherTerms = new DocIdSetBuilder(context.reader().maxDoc(), terms);
+ PriorityQueue<PostingsEnum> highFrequencyTerms =
+ new PriorityQueue<>(collectedTerms.size()) {
+ @Override
+ protected boolean lessThan(PostingsEnum a, PostingsEnum b) {
+ return a.cost() < b.cost();
+ }
+ };
+
+ // Handle the already-collected terms:
+ if (collectedTerms.isEmpty() == false) {
+ TermsEnum termsEnum2 = terms.iterator();
+ for (TermAndState t : collectedTerms) {
+ termsEnum2.seekExact(t.term, t.state);
+ PostingsEnum postings = termsEnum2.postings(null, PostingsEnum.NONE);
+ if (t.docFreq <= POSTINGS_PRE_PROCESS_THRESHOLD) {
+ otherTerms.add(postings);
+ } else {
+ highFrequencyTerms.add(postings);
+ }
+ }
+ }
+
+ // Then collect remaining terms:
+ PostingsEnum reuse = null;
+ do {
+ reuse = termsEnum.postings(reuse, PostingsEnum.NONE);
+ // If a term contains all docs with a value for the specified field, we can discard the
+ // other terms and just use the dense term's postings:
+ int docFreq = termsEnum.docFreq();
+ if (fieldDocCount == docFreq) {
+ TermStates termStates = new TermStates(searcher.getTopReaderContext());
+ termStates.register(
+ termsEnum.termState(), context.ord, docFreq, termsEnum.totalTermFreq());
+ Query q =
+ new ConstantScoreQuery(
+ new TermQuery(new Term(query.field, termsEnum.term()), termStates));
+ Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
+ return new WeightOrDocIdSetIterator(weight);
+ }
+
+ if (docFreq <= POSTINGS_PRE_PROCESS_THRESHOLD) {
+ otherTerms.add(reuse);
+ } else {
+ PostingsEnum dropped = highFrequencyTerms.insertWithOverflow(reuse);
+ if (dropped != null) {
+ otherTerms.add(dropped);
+ }
+ // Reuse the postings that drop out of the PQ. Note that `dropped` will be null here
+ // if nothing is evicted, meaning we will _not_ reuse any postings (which is intentional
+ // since we can't reuse postings that are in the PQ).
+ reuse = dropped;
+ }
+ } while (termsEnum.next() != null);
+
+ DisiPriorityQueue subs = new DisiPriorityQueue(highFrequencyTerms.size() + 1);
+ for (DocIdSetIterator disi : highFrequencyTerms) {
+ Scorer s = wrapWithDummyScorer(this, disi);
+ subs.add(new DisiWrapper(s));
+ }
+ Scorer s = wrapWithDummyScorer(this, otherTerms.build().iterator());
+ subs.add(new DisiWrapper(s));
+
+ return new WeightOrDocIdSetIterator(new DisjunctionDISIApproximation(subs));
+ }
+ };
+ }
+
+ /**
+ * Wraps a DISI with a "dummy" scorer so we can easily use {@link DisiWrapper} and {@link
+ * DisjunctionDISIApproximation} as-is. This is really just a convenient vehicle to get the DISI
+ * into the priority queue used by {@link DisjunctionDISIApproximation}. The {@link Scorer}
+ * ultimately provided by the weight provides a constant boost and reflects the actual score mode.
+ */
+ private static Scorer wrapWithDummyScorer(Weight weight, DocIdSetIterator disi) {
+ // The score and score mode do not actually matter here, except that using TOP_SCORES results
+ // in another wrapper object getting created around the disi, so we try to avoid that:
+ return new ConstantScoreScorer(weight, 1f, ScoreMode.COMPLETE_NO_SCORES, disi);
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
index 1271b3c5571..4a23ddaa006 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
@@ -17,177 +17,44 @@
package org.apache.lucene.search;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.List;
-import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
-import org.apache.lucene.util.RamUsageEstimator;
/**
- * This class also provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_REWRITE}.
- * It tries to rewrite per-segment as a boolean query that returns a constant score and otherwise
- * fills a bit set with matches and builds a Scorer on top of this bit set.
+ * This class provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_REWRITE}. It
+ * tries to rewrite per-segment as a boolean query that returns a constant score and otherwise fills
+ * a bit set with matches and builds a Scorer on top of this bit set.
*/
-final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends Query
- implements Accountable {
+final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery>
+ extends AbstractMultiTermQueryConstantScoreWrapper<Q> {
- // mtq that matches 16 terms or less will be executed as a regular disjunction
- private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
-
- @Override
- public long ramBytesUsed() {
- if (query instanceof Accountable) {
- return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
- + RamUsageEstimator.NUM_BYTES_OBJECT_REF
- + ((Accountable) query).ramBytesUsed();
- }
- return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
- + RamUsageEstimator.NUM_BYTES_OBJECT_REF
- + RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
- }
-
- private static class TermAndState {
- final BytesRef term;
- final TermState state;
- final int docFreq;
- final long totalTermFreq;
-
- TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
- this.term = term;
- this.state = state;
- this.docFreq = docFreq;
- this.totalTermFreq = totalTermFreq;
- }
- }
-
- private static class WeightOrDocIdSet {
- final Weight weight;
- final DocIdSet set;
-
- WeightOrDocIdSet(Weight weight) {
- this.weight = Objects.requireNonNull(weight);
- this.set = null;
- }
-
- WeightOrDocIdSet(DocIdSet bitset) {
- this.set = bitset;
- this.weight = null;
- }
- }
-
- protected final Q query;
-
- /** Wrap a {@link MultiTermQuery} as a Filter. */
- protected MultiTermQueryConstantScoreWrapper(Q query) {
- this.query = query;
- }
-
- @Override
- public String toString(String field) {
- // query.toString should be ok for the filter, too, if the query boost is 1.0f
- return query.toString(field);
- }
-
- @Override
- public final boolean equals(final Object other) {
- return sameClassAs(other)
- && query.equals(((MultiTermQueryConstantScoreWrapper<?>) other).query);
- }
-
- @Override
- public final int hashCode() {
- return 31 * classHash() + query.hashCode();
- }
-
- /** Returns the encapsulated query */
- public Q getQuery() {
- return query;
- }
-
- /** Returns the field name for this query */
- public final String getField() {
- return query.getField();
+ MultiTermQueryConstantScoreWrapper(Q query) {
+ super(query);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
- return new ConstantScoreWeight(this, boost) {
+ return new RewritingWeight(query, boost, scoreMode, searcher) {
- /**
- * Try to collect terms from the given terms enum and return true if all terms could be
- * collected or if one of the iterated terms contains all docs for the field. If {@code false}
- * is returned, the enum is left positioned on the next term.
- */
- private boolean collectTerms(int fieldDocCount, TermsEnum termsEnum, List<TermAndState> terms)
+ @Override
+ protected WeightOrDocIdSetIterator rewriteInner(
+ LeafReaderContext context,
+ int fieldDocCount,
+ Terms terms,
+ TermsEnum termsEnum,
+ List<TermAndState> collectedTerms)
throws IOException {
- final int threshold =
- Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, IndexSearcher.getMaxClauseCount());
- for (int i = 0; i < threshold; ++i) {
- final BytesRef term = termsEnum.next();
- if (term == null) {
- return true;
- }
- TermState state = termsEnum.termState();
- int docFreq = termsEnum.docFreq();
- TermAndState termAndState =
- new TermAndState(
- BytesRef.deepCopyOf(term), state, docFreq, termsEnum.totalTermFreq());
- if (fieldDocCount == docFreq) {
- // If the term contains every document with a value for the field, we can ignore all
- // other terms:
- terms.clear();
- terms.add(termAndState);
- return true;
- }
- terms.add(termAndState);
- }
- return termsEnum.next() == null;
- }
-
- /**
- * On the given leaf context, try to either rewrite to a disjunction if there are few terms,
- * or build a bitset containing matching docs.
- */
- private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
- final Terms terms = context.reader().terms(query.field);
- if (terms == null) {
- // field does not exist
- return new WeightOrDocIdSet((DocIdSet) null);
- }
-
- final int fieldDocCount = terms.getDocCount();
- final TermsEnum termsEnum = query.getTermsEnum(terms);
- assert termsEnum != null;
-
+ DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
PostingsEnum docs = null;
- final List<TermAndState> collectedTerms = new ArrayList<>();
- if (collectTerms(fieldDocCount, termsEnum, collectedTerms)) {
- // build a boolean query
- BooleanQuery.Builder bq = new BooleanQuery.Builder();
- for (TermAndState t : collectedTerms) {
- final TermStates termStates = new TermStates(searcher.getTopReaderContext());
- termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
- bq.add(new TermQuery(new Term(query.field, t.term), termStates), Occur.SHOULD);
- }
- Query q = new ConstantScoreQuery(bq.build());
- final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
- return new WeightOrDocIdSet(weight);
- }
-
- // Too many terms: go back to the terms we already collected and start building the bit set
- DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
+ // Handle the already-collected terms:
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
@@ -197,7 +64,7 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
}
}
- // Then keep filling the bit set with remaining terms
+ // Then keep filling the bit set with remaining terms:
do {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
// If a term contains all docs with a value for the specified field, we can discard the
@@ -211,73 +78,13 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
new ConstantScoreQuery(
new TermQuery(new Term(query.field, termsEnum.term()), termStates));
Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score());
- return new WeightOrDocIdSet(weight);
+ return new WeightOrDocIdSetIterator(weight);
}
builder.add(docs);
} while (termsEnum.next() != null);
- return new WeightOrDocIdSet(builder.build());
- }
-
- private Scorer scorer(DocIdSet set) throws IOException {
- if (set == null) {
- return null;
- }
- final DocIdSetIterator disi = set.iterator();
- if (disi == null) {
- return null;
- }
- return new ConstantScoreScorer(this, score(), scoreMode, disi);
- }
-
- @Override
- public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
- final WeightOrDocIdSet weightOrBitSet = rewrite(context);
- if (weightOrBitSet.weight != null) {
- return weightOrBitSet.weight.bulkScorer(context);
- } else {
- final Scorer scorer = scorer(weightOrBitSet.set);
- if (scorer == null) {
- return null;
- }
- return new DefaultBulkScorer(scorer);
- }
- }
-
- @Override
- public Matches matches(LeafReaderContext context, int doc) throws IOException {
- final Terms terms = context.reader().terms(query.field);
- if (terms == null) {
- return null;
- }
- return MatchesUtils.forField(
- query.field,
- () ->
- DisjunctionMatchesIterator.fromTermsEnum(
- context, doc, query, query.field, query.getTermsEnum(terms)));
- }
-
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- final WeightOrDocIdSet weightOrBitSet = rewrite(context);
- if (weightOrBitSet.weight != null) {
- return weightOrBitSet.weight.scorer(context);
- } else {
- return scorer(weightOrBitSet.set);
- }
- }
-
- @Override
- public boolean isCacheable(LeafReaderContext ctx) {
- return true;
+ return new WeightOrDocIdSetIterator(builder.build().iterator());
}
};
}
-
- @Override
- public void visit(QueryVisitor visitor) {
- if (visitor.acceptField(getField())) {
- query.visit(visitor.getSubVisitor(Occur.FILTER, this));
- }
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java b/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
index fb1d4f8a5c7..9295861b6e8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
@@ -24,13 +24,13 @@ import org.apache.lucene.util.automaton.Automaton;
* A Query that matches documents containing terms with a specified prefix. A PrefixQuery is built
* by QueryParser for input like <code>app*</code>.
*
- * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} rewrite method.
+ * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE} rewrite method.
*/
public class PrefixQuery extends AutomatonQuery {
/** Constructs a query for terms starting with <code>prefix</code>. */
public PrefixQuery(Term prefix) {
- this(prefix, CONSTANT_SCORE_REWRITE);
+ this(prefix, CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java b/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
index 936b12f2f74..ebf1b737b88 100644
--- a/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
@@ -100,7 +100,7 @@ public class RegexpQuery extends AutomatonQuery {
match_flags,
DEFAULT_PROVIDER,
determinizeWorkLimit,
- CONSTANT_SCORE_REWRITE);
+ CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
@@ -116,7 +116,7 @@ public class RegexpQuery extends AutomatonQuery {
*/
public RegexpQuery(
Term term, int syntax_flags, AutomatonProvider provider, int determinizeWorkLimit) {
- this(term, syntax_flags, 0, provider, determinizeWorkLimit, CONSTANT_SCORE_REWRITE);
+ this(term, syntax_flags, 0, provider, determinizeWorkLimit, CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
index d2ee23315cc..debc3efae15 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
@@ -42,7 +42,8 @@ public abstract class ScoringRewrite<B> extends TermCollectingRewrite<B> {
* A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause
* in a BooleanQuery, and keeps the scores as computed by the query. Note that typically such
* scores are meaningless to the user, and require non-trivial CPU to compute, so it's almost
- * always better to use {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} instead.
+ * always better to use {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE} or {@link
+ * MultiTermQuery#CONSTANT_SCORE_REWRITE} instead.
*
* <p><b>NOTE</b>: This rewrite method will hit {@link IndexSearcher.TooManyClauses} if the number
* of terms exceeds {@link IndexSearcher#getMaxClauseCount}.
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
index 00204817772..43306ccc8e2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
@@ -30,7 +30,7 @@ import org.apache.lucene.util.automaton.Automaton;
* <p><b>NOTE</b>: {@link TermRangeQuery} performs significantly slower than {@link PointRangeQuery
* point-based ranges} as it needs to visit all terms that match the range and merges their matches.
*
- * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} rewrite method.
+ * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE} rewrite method.
*
* @since 2.9
*/
@@ -60,7 +60,7 @@ public class TermRangeQuery extends AutomatonQuery {
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper) {
- this(field, lowerTerm, upperTerm, includeLower, includeUpper, CONSTANT_SCORE_REWRITE);
+ this(field, lowerTerm, upperTerm, includeLower, includeUpper, CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
@@ -121,7 +121,7 @@ public class TermRangeQuery extends AutomatonQuery {
boolean includeLower,
boolean includeUpper) {
return newStringRange(
- field, lowerTerm, upperTerm, includeLower, includeUpper, CONSTANT_SCORE_REWRITE);
+ field, lowerTerm, upperTerm, includeLower, includeUpper, CONSTANT_SCORE_BLENDED_REWRITE);
}
/** Factory that creates a new TermRangeQuery using Strings for term text. */
diff --git a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
index 2b9727c793b..eac31428180 100644
--- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
@@ -47,6 +47,7 @@ public class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
// already have the DocIdSetIterator#cost API) but the cost to build the
// DocIdSet in the first place
return query instanceof MultiTermQuery
+ || query instanceof MultiTermQueryConstantScoreBlendedWrapper
|| query instanceof MultiTermQueryConstantScoreWrapper
|| query instanceof TermInSetQuery
|| isPointQuery(query);
diff --git a/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java b/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
index aeaecb10ec9..0abb51eac3a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
@@ -31,7 +31,7 @@ import org.apache.lucene.util.automaton.Operations;
* <p>Note this query can be slow, as it needs to iterate over many terms. In order to prevent
* extremely slow WildcardQueries, a Wildcard term should not start with the wildcard <code>*</code>
*
- * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} rewrite method.
+ * <p>This query uses the {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE} rewrite method.
*
* @see AutomatonQuery
*/
@@ -59,7 +59,7 @@ public class WildcardQuery extends AutomatonQuery {
* otherwise know what to specify.
*/
public WildcardQuery(Term term, int determinizeWorkLimit) {
- this(term, determinizeWorkLimit, CONSTANT_SCORE_REWRITE);
+ this(term, determinizeWorkLimit, CONSTANT_SCORE_BLENDED_REWRITE);
}
/**
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
index c8c5ffeb4bf..3244a82442f 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
@@ -109,6 +109,15 @@ public class TestAutomatonQuery extends LuceneTestCase {
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
false,
MultiTermQuery.CONSTANT_SCORE_REWRITE)));
+ assertEquals(
+ expected,
+ automatonQueryNrHits(
+ new AutomatonQuery(
+ newTerm("bogus"),
+ automaton,
+ Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
+ false,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE)));
assertEquals(
expected,
automatonQueryNrHits(
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java
index 314b32a8494..c3ddb7135c8 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java
@@ -116,6 +116,15 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
false,
MultiTermQuery.CONSTANT_SCORE_REWRITE)));
+ assertEquals(
+ expected,
+ automatonQueryNrHits(
+ new AutomatonQuery(
+ newTerm("bogus"),
+ automaton,
+ Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
+ false,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE)));
assertEquals(
expected,
automatonQueryNrHits(
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java
index d48f9a64ad9..f06e631f2c0 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java
@@ -47,10 +47,21 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ RegexpQuery filter2 =
+ new RegexpQuery(
+ new Term(fieldName, regexp),
+ RegExp.NONE,
+ 0,
+ name -> null,
+ Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
+
TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25);
TopDocs filterDocs = searcher2.search(filter, 25);
+ TopDocs filter2Docs = searcher2.search(filter2, 25);
CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs);
+ CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filter2Docs.scoreDocs);
}
public void testEquals() throws Exception {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
index 0fd1c152a0b..886d0a682d9 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
@@ -17,6 +17,7 @@
package org.apache.lucene.search;
import java.io.IOException;
+import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -39,6 +40,9 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 1e-6f;
+ public static final Set<MultiTermQuery.RewriteMethod> CONSTANT_SCORE_REWRITES =
+ Set.of(MultiTermQuery.CONSTANT_SCORE_REWRITE, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
+
static Directory small;
static IndexReader reader;
@@ -93,7 +97,12 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
small = null;
}
- /** macro for readability */
+ /**
+ * macro for readability
+ *
+ * @deprecated please use {@link #cspq(Term, MultiTermQuery.RewriteMethod)} instead
+ */
+ @Deprecated
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
TermRangeQuery query =
TermRangeQuery.newStringRange(f, l, h, il, ih, MultiTermQuery.CONSTANT_SCORE_REWRITE);
@@ -103,6 +112,7 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
return query;
}
+ /** macro for readability */
public static Query csrq(
String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih, method);
@@ -112,28 +122,52 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
return query;
}
- /** macro for readability */
+ /**
+ * macro for readability
+ *
+ * @deprecated please use {@link #cspq(Term, MultiTermQuery.RewriteMethod)} instead
+ */
+ @Deprecated
public static Query cspq(Term prefix) {
return new PrefixQuery(prefix, MultiTermQuery.CONSTANT_SCORE_REWRITE);
}
/** macro for readability */
+ public static Query cspq(Term prefix, MultiTermQuery.RewriteMethod method) {
+ return new PrefixQuery(prefix, method);
+ }
+
+ /**
+ * macro for readability
+ *
+ * @deprecated please use {@link #cswcq(Term, MultiTermQuery.RewriteMethod)} instead
+ */
+ @Deprecated
public static Query cswcq(Term wild) {
return new WildcardQuery(
wild, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, MultiTermQuery.CONSTANT_SCORE_REWRITE);
}
+ /** macro for readability */
+ public static Query cswcq(Term wild, MultiTermQuery.RewriteMethod method) {
+ return new WildcardQuery(wild, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, method);
+ }
+
@Test
public void testBasics() throws IOException {
- QueryUtils.check(csrq("data", "1", "6", T, T));
- QueryUtils.check(csrq("data", "A", "Z", T, T));
- QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A", "Z", T, T));
-
- QueryUtils.check(cspq(new Term("data", "p*u?")));
- QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term("data", "pres*")));
-
- QueryUtils.check(cswcq(new Term("data", "p")));
- QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term("data", "pr*t?j")));
+ for (MultiTermQuery.RewriteMethod rw : CONSTANT_SCORE_REWRITES) {
+ QueryUtils.check(csrq("data", "1", "6", T, T, rw));
+ QueryUtils.check(csrq("data", "A", "Z", T, T, rw));
+ QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, rw), csrq("data", "A", "Z", T, T, rw));
+
+ QueryUtils.check(cspq(new Term("data", "p*u?"), rw));
+ QueryUtils.checkUnequal(
+ cspq(new Term("data", "pre*"), rw), cspq(new Term("data", "pres*"), rw));
+
+ QueryUtils.check(cswcq(new Term("data", "p"), rw));
+ QueryUtils.checkUnequal(
+ cswcq(new Term("data", "pre*n?t"), rw), cswcq(new Term("data", "pr*t?j"), rw));
+ }
}
@Test
@@ -146,7 +180,10 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
// some hits match more terms then others, score should be the same
- result = search.search(csrq("data", "1", "6", T, T), 1000).scoreDocs;
+ result =
+ search.search(
+ csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE), 1000)
+ .scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
float score = result[0].score;
@@ -190,7 +227,9 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
- bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs
+ bq.add(
+ csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE),
+ BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq.build(), 1000).scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
@@ -233,26 +272,29 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
IndexSearcher search = newSearcher(reader);
- // first do a regular TermRangeQuery which uses term expansion so
- // docs with more terms in range get higher scores
+ for (MultiTermQuery.RewriteMethod rw : CONSTANT_SCORE_REWRITES) {
- Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T);
+ // first do a regular TermRangeQuery which uses term expansion so
+ // docs with more terms in range get higher scores
- ScoreDoc[] expected = search.search(rq, 1000).scoreDocs;
- int numHits = expected.length;
+ Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T, rw);
- // now do a boolean where which also contains a
- // ConstantScoreRangeQuery and make sure hte order is the same
+ ScoreDoc[] expected = search.search(rq, 1000).scoreDocs;
+ int numHits = expected.length;
- BooleanQuery.Builder q = new BooleanQuery.Builder();
- q.add(rq, BooleanClause.Occur.MUST); // T, F);
- q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST); // T, F);
+ // now do a boolean where which also contains a
+ // ConstantScoreRangeQuery and make sure the order is the same
- ScoreDoc[] actual = search.search(q.build(), 1000).scoreDocs;
+ BooleanQuery.Builder q = new BooleanQuery.Builder();
+ q.add(rq, BooleanClause.Occur.MUST); // T, F);
+ q.add(csrq("data", "1", "6", T, T, rw), BooleanClause.Occur.MUST); // T, F);
- assertEquals("wrong numebr of hits", numHits, actual.length);
- for (int i = 0; i < numHits; i++) {
- assertEquals("mismatch in docid for hit#" + i, expected[i].doc, actual[i].doc);
+ ScoreDoc[] actual = search.search(q.build(), 1000).scoreDocs;
+
+ assertEquals("wrong number of hits", numHits, actual.length);
+ for (int i = 0; i < numHits; i++) {
+ assertEquals("mismatch in docid for hit#" + i, expected[i].doc, actual[i].doc);
+ }
}
}
@@ -279,153 +321,74 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
ScoreDoc[] result;
- // test id, bounded on both ends
-
- result = search.search(csrq("id", minIP, maxIP, T, T), numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result =
- search.search(
- csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, T, F), numDocs).scoreDocs;
- assertEquals("all but last", numDocs - 1, result.length);
-
- result =
- search.search(
- csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("all but last", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, T), numDocs).scoreDocs;
- assertEquals("all but first", numDocs - 1, result.length);
-
- result =
- search.search(
- csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("all but first", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, F), numDocs).scoreDocs;
- assertEquals("all but ends", numDocs - 2, result.length);
-
- result =
- search.search(
- csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("all but ends", numDocs - 2, result.length);
-
- result = search.search(csrq("id", medIP, maxIP, T, T), numDocs).scoreDocs;
- assertEquals("med and up", 1 + maxId - medId, result.length);
-
- result =
- search.search(
- csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("med and up", 1 + maxId - medId, result.length);
+ for (MultiTermQuery.RewriteMethod rw : CONSTANT_SCORE_REWRITES) {
- result = search.search(csrq("id", minIP, medIP, T, T), numDocs).scoreDocs;
- assertEquals("up to med", 1 + medId - minId, result.length);
+ // test id, bounded on both ends
- result =
- search.search(
- csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("up to med", 1 + medId - minId, result.length);
+ result = search.search(csrq("id", minIP, maxIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
- // unbounded id
+ result = search.search(csrq("id", minIP, maxIP, T, F, rw), numDocs).scoreDocs;
+ assertEquals("all but last", numDocs - 1, result.length);
- result = search.search(csrq("id", minIP, null, T, F), numDocs).scoreDocs;
- assertEquals("min and up", numDocs, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("all but first", numDocs - 1, result.length);
- result = search.search(csrq("id", null, maxIP, F, T), numDocs).scoreDocs;
- assertEquals("max and down", numDocs, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("all but ends", numDocs - 2, result.length);
- result = search.search(csrq("id", minIP, null, F, F), numDocs).scoreDocs;
- assertEquals("not min, but up", numDocs - 1, result.length);
+ result = search.search(csrq("id", medIP, maxIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("med and up", 1 + maxId - medId, result.length);
- result = search.search(csrq("id", null, maxIP, F, F), numDocs).scoreDocs;
- assertEquals("not max, but down", numDocs - 1, result.length);
+ result = search.search(csrq("id", minIP, medIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("up to med", 1 + medId - minId, result.length);
- result = search.search(csrq("id", medIP, maxIP, T, F), numDocs).scoreDocs;
- assertEquals("med and up, not max", maxId - medId, result.length);
+ // unbounded id
- result = search.search(csrq("id", minIP, medIP, F, T), numDocs).scoreDocs;
- assertEquals("not min, up to med", medId - minId, result.length);
+ result = search.search(csrq("id", minIP, null, T, F, rw), numDocs).scoreDocs;
+ assertEquals("min and up", numDocs, result.length);
- // very small sets
+ result = search.search(csrq("id", null, maxIP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("max and down", numDocs, result.length);
- result = search.search(csrq("id", minIP, minIP, F, F), numDocs).scoreDocs;
- assertEquals("min,min,F,F", 0, result.length);
-
- result =
- search.search(
- csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("min,min,F,F", 0, result.length);
-
- result = search.search(csrq("id", medIP, medIP, F, F), numDocs).scoreDocs;
- assertEquals("med,med,F,F", 0, result.length);
-
- result =
- search.search(
- csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("med,med,F,F", 0, result.length);
+ result = search.search(csrq("id", minIP, null, F, F, rw), numDocs).scoreDocs;
+ assertEquals("not min, but up", numDocs - 1, result.length);
- result = search.search(csrq("id", maxIP, maxIP, F, F), numDocs).scoreDocs;
- assertEquals("max,max,F,F", 0, result.length);
+ result = search.search(csrq("id", null, maxIP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("not max, but down", numDocs - 1, result.length);
- result =
- search.search(
- csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("max,max,F,F", 0, result.length);
+ result = search.search(csrq("id", medIP, maxIP, T, F, rw), numDocs).scoreDocs;
+ assertEquals("med and up, not max", maxId - medId, result.length);
- result = search.search(csrq("id", minIP, minIP, T, T), numDocs).scoreDocs;
- assertEquals("min,min,T,T", 1, result.length);
+ result = search.search(csrq("id", minIP, medIP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("not min, up to med", medId - minId, result.length);
- result =
- search.search(
- csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("min,min,T,T", 1, result.length);
+ // very small sets
- result = search.search(csrq("id", null, minIP, F, T), numDocs).scoreDocs;
- assertEquals("nul,min,F,T", 1, result.length);
+ result = search.search(csrq("id", minIP, minIP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
- result =
- search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("nul,min,F,T", 1, result.length);
+ result = search.search(csrq("id", medIP, medIP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("med,med,F,F", 0, result.length);
- result = search.search(csrq("id", maxIP, maxIP, T, T), numDocs).scoreDocs;
- assertEquals("max,max,T,T", 1, result.length);
+ result = search.search(csrq("id", maxIP, maxIP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
- result =
- search.search(
- csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("max,max,T,T", 1, result.length);
+ result = search.search(csrq("id", minIP, minIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
- result = search.search(csrq("id", maxIP, null, T, F), numDocs).scoreDocs;
- assertEquals("max,nul,T,T", 1, result.length);
+ result = search.search(csrq("id", null, minIP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
- result =
- search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("max,nul,T,T", 1, result.length);
+ result = search.search(csrq("id", maxIP, maxIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
- result = search.search(csrq("id", medIP, medIP, T, T), numDocs).scoreDocs;
- assertEquals("med,med,T,T", 1, result.length);
+ result = search.search(csrq("id", maxIP, null, T, F, rw), numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
- result =
- search.search(
- csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_REWRITE), numDocs)
- .scoreDocs;
- assertEquals("med,med,T,T", 1, result.length);
+ result = search.search(csrq("id", medIP, medIP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("med,med,T,T", 1, result.length);
+ }
}
@Test
@@ -444,49 +407,52 @@ public class TestMultiTermConstantScore extends TestBaseRangeFilter {
ScoreDoc[] result;
- // test extremes, bounded on both ends
+ for (MultiTermQuery.RewriteMethod rw : CONSTANT_SCORE_REWRITES) {
- result = search.search(csrq("rand", minRP, maxRP, T, T), numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
+ // test extremes, bounded on both ends
- result = search.search(csrq("rand", minRP, maxRP, T, F), numDocs).scoreDocs;
- assertEquals("all but biggest", numDocs - 1, result.length);
+ result = search.search(csrq("rand", minRP, maxRP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
- result = search.search(csrq("rand", minRP, maxRP, F, T), numDocs).scoreDocs;
- assertEquals("all but smallest", numDocs - 1, result.length);
+ result = search.search(csrq("rand", minRP, maxRP, T, F, rw), numDocs).scoreDocs;
+ assertEquals("all but biggest", numDocs - 1, result.length);
- result = search.search(csrq("rand", minRP, maxRP, F, F), numDocs).scoreDocs;
- assertEquals("all but extremes", numDocs - 2, result.length);
+ result = search.search(csrq("rand", minRP, maxRP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("all but smallest", numDocs - 1, result.length);
- // unbounded
+ result = search.search(csrq("rand", minRP, maxRP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("all but extremes", numDocs - 2, result.length);
- result = search.search(csrq("rand", minRP, null, T, F), numDocs).scoreDocs;
- assertEquals("smallest and up", numDocs, result.length);
+ // unbounded
- result = search.search(csrq("rand", null, maxRP, F, T), numDocs).scoreDocs;
- assertEquals("biggest and down", numDocs, result.length);
+ result = search.search(csrq("rand", minRP, null, T, F, rw), numDocs).scoreDocs;
+ assertEquals("smallest and up", numDocs, result.length);
- result = search.search(csrq("rand", minRP, null, F, F), numDocs).scoreDocs;
- assertEquals("not smallest, but up", numDocs - 1, result.length);
+ result = search.search(csrq("rand", null, maxRP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("biggest and down", numDocs, result.length);
- result = search.search(csrq("rand", null, maxRP, F, F), numDocs).scoreDocs;
- assertEquals("not biggest, but down", numDocs - 1, result.length);
+ result = search.search(csrq("rand", minRP, null, F, F, rw), numDocs).scoreDocs;
+ assertEquals("not smallest, but up", numDocs - 1, result.length);
- // very small sets
+ result = search.search(csrq("rand", null, maxRP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("not biggest, but down", numDocs - 1, result.length);
- result = search.search(csrq("rand", minRP, minRP, F, F), numDocs).scoreDocs;
- assertEquals("min,min,F,F", 0, result.length);
- result = search.search(csrq("rand", maxRP, maxRP, F, F), numDocs).scoreDocs;
- assertEquals("max,max,F,F", 0, result.length);
+ // very small sets
- result = search.search(csrq("rand", minRP, minRP, T, T), numDocs).scoreDocs;
- assertEquals("min,min,T,T", 1, result.length);
- result = search.search(csrq("rand", null, minRP, F, T), numDocs).scoreDocs;
- assertEquals("nul,min,F,T", 1, result.length);
+ result = search.search(csrq("rand", minRP, minRP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
+ result = search.search(csrq("rand", maxRP, maxRP, F, F, rw), numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
- result = search.search(csrq("rand", maxRP, maxRP, T, T), numDocs).scoreDocs;
- assertEquals("max,max,T,T", 1, result.length);
- result = search.search(csrq("rand", maxRP, null, T, F), numDocs).scoreDocs;
- assertEquals("max,nul,T,T", 1, result.length);
+ result = search.search(csrq("rand", minRP, minRP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
+ result = search.search(csrq("rand", null, minRP, F, T, rw), numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
+
+ result = search.search(csrq("rand", maxRP, maxRP, T, T, rw), numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
+ result = search.search(csrq("rand", maxRP, null, T, F, rw), numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
+ }
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
index c7bd79c53bc..ee1c1ba8344 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
@@ -266,6 +266,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
checkMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
checkNoMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ checkNoMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
checkNoMaxClauseLimitation(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
checkNoMaxClauseLimitation(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
index c6136bedc99..80f61e6d171 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPrefixRandom.java
@@ -81,7 +81,7 @@ public class TestPrefixRandom extends LuceneTestCase {
private final BytesRef prefix;
DumbPrefixQuery(Term term) {
- super(term.field(), CONSTANT_SCORE_REWRITE);
+ super(term.field(), CONSTANT_SCORE_BLENDED_REWRITE);
prefix = term.bytes();
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
index 8d976726367..f8bddb7f4cb 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@@ -109,7 +109,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
private final Automaton automaton;
DumbRegexpQuery(Term term, int flags) {
- super(term.field(), MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ super(term.field(), MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
RegExp re = new RegExp(term.text(), flags);
automaton = re.toAutomaton();
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
index 4522ab8ca51..37f89a9f6bc 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
@@ -83,6 +83,14 @@ public class TestWildcard extends LuceneTestCase {
MultiTermQuery.CONSTANT_SCORE_REWRITE));
assertTrue(q instanceof MultiTermQueryConstantScoreWrapper);
+ q =
+ searcher.rewrite(
+ new WildcardQuery(
+ new Term("field", "nowildcard"),
+ Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE));
+ assertTrue(q instanceof MultiTermQueryConstantScoreBlendedWrapper);
+
q =
searcher.rewrite(
new WildcardQuery(
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TestHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TestHighlighter.java
index 5dc408aed35..cb1e640f355 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TestHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TestHighlighter.java
@@ -1094,7 +1094,7 @@ public class TestHighlighter extends BaseTokenStreamTestCase implements Formatte
new WildcardQuery(
new Term(FIELD_NAME, "ken*"),
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
- MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
searcher = newSearcher(reader);
// can't rewrite ConstantScore if you want to highlight it -
// it rewrites to ConstantScoreQuery which cannot be highlighted
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
index 29f02e57597..9f4db1cb5b0 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
@@ -63,7 +63,7 @@ class TermsQuery extends MultiTermQuery implements Accountable {
String fromField,
Query fromQuery,
Object indexReaderContextId) {
- super(toField, CONSTANT_SCORE_REWRITE);
+ super(toField, CONSTANT_SCORE_BLENDED_REWRITE);
this.terms = terms;
ords = terms.sort();
this.fromField = fromField;
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
index a59e0a1a7b9..418f91f4fb6 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@@ -82,7 +82,8 @@ public abstract class QueryParserBase extends QueryBuilder
/** The actual operator that parser uses to combine query terms */
Operator operator = OR_OPERATOR;
- MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod =
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
boolean allowLeadingWildcard = false;
protected String field;
@@ -256,15 +257,6 @@ public abstract class QueryParserBase extends QueryBuilder
return operator;
}
- /**
- * By default QueryParser uses {@link
- * org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_REWRITE} when creating a {@link
- * PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally
- * preferable because it a) Runs faster b) Does not have the scarcity of terms unduly influence
- * score c) avoids any {@link TooManyClauses} exception. However, if your application really needs
- * to use the old-fashioned {@link BooleanQuery} expansion rewriting and the above points are not
- * relevant then use this to change the rewrite method.
- */
@Override
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java
index 2f9a1725d3e..4cb8f49a53a 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java
@@ -18,12 +18,15 @@ package org.apache.lucene.queryparser.flexible.standard;
import java.util.Locale;
import java.util.TimeZone;
-import java.util.TooManyListenersException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.WildcardQuery;
/** Configuration options common across queryparser implementations. */
public interface CommonQueryParserConfiguration {
@@ -55,12 +58,17 @@ public interface CommonQueryParserConfiguration {
public boolean getEnablePositionIncrements();
/**
- * By default, it uses {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} when creating a prefix,
- * wildcard and range queries. This implementation is generally preferable because it a) Runs
- * faster b) Does not have the scarcity of terms unduly influence score c) avoids any {@link
- * TooManyListenersException} exception. However, if your application really needs to use the
- * old-fashioned boolean queries expansion rewriting and the above points are not relevant then
- * use this change the rewrite method.
+ * By default QueryParser uses {@link
+ * org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE} when creating a {@link
+ * PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally
+ * preferable because it a) Runs faster b) Does not have the scarcity of terms unduly influence
+ * score c) avoids any {@link org.apache.lucene.search.IndexSearcher.TooManyClauses} exception.
+ * However, if your application really needs to use the old-fashioned {@link BooleanQuery}
+ * expansion rewriting and the above points are not relevant then use this to change the rewrite
+ * method. As another alternative, if you prefer all terms to be rewritten as a filter up-front,
+ * you can use {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_REWRITE}. For more
+ * information on the different rewrite methods available, see {@link
+ * org.apache.lucene.search.MultiTermQuery} documentation.
*/
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method);
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java
index 9b24643067b..f72ba10f4cd 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java
@@ -19,7 +19,6 @@ package org.apache.lucene.queryparser.flexible.standard;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
-import java.util.TooManyListenersException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
@@ -342,14 +341,6 @@ public class StandardQueryParser extends QueryParserHelper
}
}
- /**
- * By default, it uses {@link MultiTermQuery#CONSTANT_SCORE_REWRITE} when creating a prefix,
- * wildcard and range queries. This implementation is generally preferable because it a) Runs
- * faster b) Does not have the scarcity of terms unduly influence score c) avoids any {@link
- * TooManyListenersException} exception. However, if your application really needs to use the
- * old-fashioned boolean queries expansion rewriting and the above points are not relevant then
- * use this change the rewrite method.
- */
@Override
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
getQueryConfigHandler().set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, method);
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/PrefixWildcardQueryNodeBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/PrefixWildcardQueryNodeBuilder.java
index 55f230b93d9..e3d862b5532 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/PrefixWildcardQueryNodeBuilder.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/PrefixWildcardQueryNodeBuilder.java
@@ -39,7 +39,7 @@ public class PrefixWildcardQueryNodeBuilder implements StandardQueryBuilder {
MultiTermQuery.RewriteMethod method =
(MultiTermQuery.RewriteMethod) queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
if (method == null) {
- method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
+ method = MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
}
String text =
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
index a124caa2363..0b4319b2be9 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
@@ -40,7 +40,7 @@ public class RegexpQueryNodeBuilder implements StandardQueryBuilder {
MultiTermQuery.RewriteMethod method =
(MultiTermQuery.RewriteMethod) queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
if (method == null) {
- method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
+ method = MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
}
// TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000)
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/TermRangeQueryNodeBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/TermRangeQueryNodeBuilder.java
index 05766f51de9..e47cb6da21c 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/TermRangeQueryNodeBuilder.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/TermRangeQueryNodeBuilder.java
@@ -53,7 +53,7 @@ public class TermRangeQueryNodeBuilder implements StandardQueryBuilder {
MultiTermQuery.RewriteMethod method =
(MultiTermQuery.RewriteMethod) queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
if (method == null) {
- method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
+ method = MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
}
return TermRangeQuery.newStringRange(
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/WildcardQueryNodeBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/WildcardQueryNodeBuilder.java
index 93bee367d5c..538962c2738 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/WildcardQueryNodeBuilder.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/WildcardQueryNodeBuilder.java
@@ -39,7 +39,7 @@ public class WildcardQueryNodeBuilder implements StandardQueryBuilder {
MultiTermQuery.RewriteMethod method =
(MultiTermQuery.RewriteMethod) queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
if (method == null) {
- method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
+ method = MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
}
return new WildcardQuery(
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java
index 6cd4d969e49..eb8d20844a7 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java
@@ -203,7 +203,7 @@ public class StandardQueryConfigHandler extends QueryConfigHandler {
set(ConfigurationKeys.FIELD_BOOST_MAP, new LinkedHashMap<String, Float>());
set(ConfigurationKeys.FUZZY_CONFIG, new FuzzyConfig());
set(ConfigurationKeys.LOCALE, Locale.getDefault());
- set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_REWRITE);
+ set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
set(
ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP,
new HashMap<CharSequence, DateTools.Resolution>());
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/MultiTermRewriteMethodProcessor.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/MultiTermRewriteMethodProcessor.java
index 7e4068e38b9..569f55d4b2e 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/MultiTermRewriteMethodProcessor.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/MultiTermRewriteMethodProcessor.java
@@ -28,7 +28,7 @@ import org.apache.lucene.search.MultiTermQuery;
/**
* This processor instates the default {@link
* org.apache.lucene.search.MultiTermQuery.RewriteMethod}, {@link
- * MultiTermQuery#CONSTANT_SCORE_REWRITE}, for multi-term query nodes.
+ * MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE}, for multi-term query nodes.
*/
public class MultiTermRewriteMethodProcessor extends QueryNodeProcessorImpl {
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
index a927aafbb23..7886624a72c 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
@@ -308,15 +308,18 @@ public class TestQPHelper extends LuceneTestCase {
new StandardQueryParser(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
Query q = qp.parse("foo*bar", "field");
assertTrue(q instanceof WildcardQuery);
- assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
+ assertEquals(
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
q = qp.parse("foo*", "field");
assertTrue(q instanceof PrefixQuery);
- assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
+ assertEquals(
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
q = qp.parse("[a TO z]", "field");
assertTrue(q instanceof TermRangeQuery);
- assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
+ assertEquals(
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
}
public void testCJK() throws Exception {
@@ -646,7 +649,7 @@ public class TestQPHelper extends LuceneTestCase {
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
assertEquals(
- MultiTermQuery.CONSTANT_SCORE_REWRITE,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE,
((TermRangeQuery) getQuery("[ a TO z]", null)).getRewriteMethod());
StandardQueryParser qp = new StandardQueryParser();
@@ -1205,7 +1208,6 @@ public class TestQPHelper extends LuceneTestCase {
MultiTermQuery.SCORING_BOOLEAN_REWRITE,
((RegexpQuery) (((BoostQuery) qp.parse("/[A-Z][123]/^0.5", df)).getQuery()))
.getRewriteMethod());
- qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
}
public void testStopwords() throws Exception {
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
index 6a5b385d73c..1f40a036b26 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
@@ -561,7 +561,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
assertQueryEquals("{ a TO z]", null, "{a TO z]");
assertEquals(
- MultiTermQuery.CONSTANT_SCORE_REWRITE,
+ MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE,
((TermRangeQuery) getQuery("[ a TO z]")).getRewriteMethod());
CommonQueryParserConfiguration qp =