You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2023/01/12 15:50:13 UTC
[lucene] branch main updated: Fix exponential runtime for Boolean#rewrite (#12072)
This is an automated email from the ASF dual-hosted git repository.
javanna pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 59b17452aaf Fix exponential runtime for Boolean#rewrite (#12072)
59b17452aaf is described below
commit 59b17452aaf04fc6a6f54f70cc4d1307f077e875
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Jan 12 10:50:05 2023 -0500
Fix exponential runtime for Boolean#rewrite (#12072)
When #672 was introduced, it added many nice rewrite optimizations. However, in the case when there are many multiple nested Boolean queries under a top level Boolean#filter clause, its runtime grows exponentially.
The key issue was how the BooleanQuery#rewriteNoScoring redirected yet again to the ConstantScoreQuery#rewrite. This causes BooleanQuery#rewrite to be called again recursively , even though it was previously called in ConstantScoreQuery#rewrite, and THEN BooleanQuery#rewriteNoScoring is called again, recursively.
This causes exponential growth in rewrite time based on query depth. The change here hopes to short-circuit that and only grow (near) linearly by calling BooleanQuery#rewriteNoScoring directly, instead if attempting to redirect through ConstantScoreQuery#rewrite.
closes: #12069
---
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/search/BooleanQuery.java | 13 ++-
.../apache/lucene/search/ConstantScoreQuery.java | 2 +-
.../apache/lucene/search/TestBooleanRewrites.java | 100 +++++++++++++++++++++
4 files changed, 115 insertions(+), 3 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fdc9d05734b..fd7c80c3b0e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -234,6 +234,9 @@ Bug Fixes
* GITHUB#12046: Out of boundary in CombinedFieldQuery#addTerm. (Lu Xugang)
+* GITHUB#12072: Fix exponential runtime for nested BooleanQuery#rewrite when a
+ BooleanClause is non-scoring. (Ben Trent)
+
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 0354280eb01..07823ae5c4d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -192,7 +192,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
// Utility method for rewriting BooleanQuery when scores are not needed.
// This is called from ConstantScoreQuery#rewrite
- BooleanQuery rewriteNoScoring(IndexSearcher indexSearcher) throws IOException {
+ BooleanQuery rewriteNoScoring() {
boolean actuallyRewritten = false;
BooleanQuery.Builder newQuery =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
@@ -203,10 +203,19 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
for (BooleanClause clause : clauses) {
Query query = clause.getQuery();
- Query rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
+ // NOTE: rewritingNoScoring() should not call rewrite(), otherwise this
+ // method could run in exponential time with the depth of the query as
+ // every new level would rewrite 2x more than its parent level.
+ Query rewritten = query;
+ if (rewritten instanceof BoostQuery) {
+ rewritten = ((BoostQuery) rewritten).getQuery();
+ }
if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
}
+ if (rewritten instanceof BooleanQuery) {
+ rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
+ }
BooleanClause.Occur occur = clause.getOccur();
if (occur == Occur.SHOULD && keepShould == false) {
// ignore clause
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 1984cacccbe..48f763e1646 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -50,7 +50,7 @@ public final class ConstantScoreQuery extends Query {
} else if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
} else if (rewritten instanceof BooleanQuery) {
- rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(indexSearcher);
+ rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
}
if (rewritten.getClass() == MatchNoDocsQuery.class) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index 4f2e3a829b8..87b8068d2f1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Map;
import java.util.Random;
import java.util.Set;
+import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -322,6 +323,75 @@ public class TestBooleanRewrites extends LuceneTestCase {
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
}
+ public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
+ IndexSearcher searcher = newSearcher(new MultiReader());
+ Function<Integer, TermQuery> termQueryFunction =
+ (i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
+ int depth = TestUtil.nextInt(random(), 10, 30);
+ TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
+ TestRewriteQuery rewriteQuery = new TestRewriteQuery();
+ Query expectedQuery =
+ new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
+ Query deepBuilder =
+ new BooleanQuery.Builder()
+ .add(rewriteQuery, Occur.SHOULD)
+ .setMinimumNumberShouldMatch(1)
+ .build();
+ for (int i = depth; i > 0; i--) {
+ TermQuery tq = termQueryFunction.apply(i);
+ BooleanQuery.Builder bq =
+ new BooleanQuery.Builder()
+ .setMinimumNumberShouldMatch(2)
+ .add(tq, Occur.SHOULD)
+ .add(deepBuilder, Occur.SHOULD);
+ deepBuilder = bq.build();
+ BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
+ if (i == depth) {
+ expectedBq.add(rewriteQuery, Occur.FILTER);
+ } else {
+ expectedBq.add(expectedQuery, Occur.FILTER);
+ }
+ expectedQuery = expectedBq.build();
+ }
+ BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
+ expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
+ Query rewritten = searcher.rewrite(bq);
+ assertEquals(expectedQuery, rewritten);
+ // the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
+ // `FILTER`
+ assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
+ }
+
+ public void testDeeplyNestedBooleanRewrite() throws IOException {
+ IndexSearcher searcher = newSearcher(new MultiReader());
+ Function<Integer, TermQuery> termQueryFunction =
+ (i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
+ int depth = TestUtil.nextInt(random(), 10, 30);
+ TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
+ TestRewriteQuery rewriteQuery = new TestRewriteQuery();
+ Query expectedQuery =
+ new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
+ Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
+ for (int i = depth; i > 0; i--) {
+ TermQuery tq = termQueryFunction.apply(i);
+ BooleanQuery.Builder bq =
+ new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
+ deepBuilder = bq.build();
+ BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
+ if (i == depth) {
+ expectedBq.add(rewriteQuery, Occur.FILTER);
+ } else {
+ expectedBq.add(expectedQuery, Occur.FILTER);
+ }
+ expectedQuery = expectedBq.build();
+ }
+ BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
+ expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
+ Query rewritten = searcher.rewrite(bq);
+ assertEquals(expectedQuery, rewritten);
+ assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
+ }
+
public void testRemoveMatchAllFilter() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
@@ -864,4 +934,34 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
}
+
+ // Test query to count number of rewrites for its lifetime.
+ private static final class TestRewriteQuery extends Query {
+
+ private int numRewrites = 0;
+
+ @Override
+ public String toString(String field) {
+ return "TestRewriteQuery{rewrites=" + numRewrites + "}";
+ }
+
+ @Override
+ public Query rewrite(IndexSearcher indexSearcher) {
+ numRewrites++;
+ return this;
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {}
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof TestRewriteQuery;
+ }
+
+ @Override
+ public int hashCode() {
+ return TestRewriteQuery.class.hashCode();
+ }
+ }
}