You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2023/01/12 15:50:13 UTC

[lucene] branch main updated: Fix exponential runtime for Boolean#rewrite (#12072)

This is an automated email from the ASF dual-hosted git repository.

javanna pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 59b17452aaf Fix exponential runtime for Boolean#rewrite (#12072)
59b17452aaf is described below

commit 59b17452aaf04fc6a6f54f70cc4d1307f077e875
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Jan 12 10:50:05 2023 -0500

    Fix exponential runtime for Boolean#rewrite (#12072)
    
    When #672 was introduced, it added many nice rewrite optimizations. However, in the case when there are many multiple nested Boolean queries under a top level Boolean#filter clause, its runtime grows exponentially.
    
    The key issue was how the BooleanQuery#rewriteNoScoring redirected yet again to the ConstantScoreQuery#rewrite. This causes BooleanQuery#rewrite to be called again recursively , even though it was previously called in ConstantScoreQuery#rewrite, and THEN BooleanQuery#rewriteNoScoring is called again, recursively.
    
    This causes exponential growth in rewrite time based on query depth. The change here hopes to short-circuit that and only grow (near) linearly by calling BooleanQuery#rewriteNoScoring directly, instead if attempting to redirect through ConstantScoreQuery#rewrite.
    
    closes: #12069
---
 lucene/CHANGES.txt                                 |   3 +
 .../org/apache/lucene/search/BooleanQuery.java     |  13 ++-
 .../apache/lucene/search/ConstantScoreQuery.java   |   2 +-
 .../apache/lucene/search/TestBooleanRewrites.java  | 100 +++++++++++++++++++++
 4 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fdc9d05734b..fd7c80c3b0e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -234,6 +234,9 @@ Bug Fixes
 
 * GITHUB#12046: Out of boundary in CombinedFieldQuery#addTerm. (Lu Xugang)
 
+* GITHUB#12072: Fix exponential runtime for nested BooleanQuery#rewrite when a
+  BooleanClause is non-scoring. (Ben Trent)
+
 Optimizations
 ---------------------
 * GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 0354280eb01..07823ae5c4d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -192,7 +192,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
 
   // Utility method for rewriting BooleanQuery when scores are not needed.
   // This is called from ConstantScoreQuery#rewrite
-  BooleanQuery rewriteNoScoring(IndexSearcher indexSearcher) throws IOException {
+  BooleanQuery rewriteNoScoring() {
     boolean actuallyRewritten = false;
     BooleanQuery.Builder newQuery =
         new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
@@ -203,10 +203,19 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
 
     for (BooleanClause clause : clauses) {
       Query query = clause.getQuery();
-      Query rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
+      // NOTE: rewritingNoScoring() should not call rewrite(), otherwise this
+      // method could run in exponential time with the depth of the query as
+      // every new level would rewrite 2x more than its parent level.
+      Query rewritten = query;
+      if (rewritten instanceof BoostQuery) {
+        rewritten = ((BoostQuery) rewritten).getQuery();
+      }
       if (rewritten instanceof ConstantScoreQuery) {
         rewritten = ((ConstantScoreQuery) rewritten).getQuery();
       }
+      if (rewritten instanceof BooleanQuery) {
+        rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
+      }
       BooleanClause.Occur occur = clause.getOccur();
       if (occur == Occur.SHOULD && keepShould == false) {
         // ignore clause
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 1984cacccbe..48f763e1646 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -50,7 +50,7 @@ public final class ConstantScoreQuery extends Query {
     } else if (rewritten instanceof ConstantScoreQuery) {
       rewritten = ((ConstantScoreQuery) rewritten).getQuery();
     } else if (rewritten instanceof BooleanQuery) {
-      rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(indexSearcher);
+      rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
     }
 
     if (rewritten.getClass() == MatchNoDocsQuery.class) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index 4f2e3a829b8..87b8068d2f1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -21,6 +21,7 @@ import java.util.Arrays;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -322,6 +323,75 @@ public class TestBooleanRewrites extends LuceneTestCase {
     assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
   }
 
+  public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+    Function<Integer, TermQuery> termQueryFunction =
+        (i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
+    int depth = TestUtil.nextInt(random(), 10, 30);
+    TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
+    TestRewriteQuery rewriteQuery = new TestRewriteQuery();
+    Query expectedQuery =
+        new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
+    Query deepBuilder =
+        new BooleanQuery.Builder()
+            .add(rewriteQuery, Occur.SHOULD)
+            .setMinimumNumberShouldMatch(1)
+            .build();
+    for (int i = depth; i > 0; i--) {
+      TermQuery tq = termQueryFunction.apply(i);
+      BooleanQuery.Builder bq =
+          new BooleanQuery.Builder()
+              .setMinimumNumberShouldMatch(2)
+              .add(tq, Occur.SHOULD)
+              .add(deepBuilder, Occur.SHOULD);
+      deepBuilder = bq.build();
+      BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
+      if (i == depth) {
+        expectedBq.add(rewriteQuery, Occur.FILTER);
+      } else {
+        expectedBq.add(expectedQuery, Occur.FILTER);
+      }
+      expectedQuery = expectedBq.build();
+    }
+    BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
+    expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
+    Query rewritten = searcher.rewrite(bq);
+    assertEquals(expectedQuery, rewritten);
+    // the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
+    // `FILTER`
+    assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
+  }
+
+  public void testDeeplyNestedBooleanRewrite() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+    Function<Integer, TermQuery> termQueryFunction =
+        (i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
+    int depth = TestUtil.nextInt(random(), 10, 30);
+    TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
+    TestRewriteQuery rewriteQuery = new TestRewriteQuery();
+    Query expectedQuery =
+        new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
+    Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
+    for (int i = depth; i > 0; i--) {
+      TermQuery tq = termQueryFunction.apply(i);
+      BooleanQuery.Builder bq =
+          new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
+      deepBuilder = bq.build();
+      BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
+      if (i == depth) {
+        expectedBq.add(rewriteQuery, Occur.FILTER);
+      } else {
+        expectedBq.add(expectedQuery, Occur.FILTER);
+      }
+      expectedQuery = expectedBq.build();
+    }
+    BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
+    expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
+    Query rewritten = searcher.rewrite(bq);
+    assertEquals(expectedQuery, rewritten);
+    assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
+  }
+
   public void testRemoveMatchAllFilter() throws IOException {
     IndexSearcher searcher = newSearcher(new MultiReader());
 
@@ -864,4 +934,34 @@ public class TestBooleanRewrites extends LuceneTestCase {
             .build();
     assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
   }
+
+  // Test query to count number of rewrites for its lifetime.
+  private static final class TestRewriteQuery extends Query {
+
+    private int numRewrites = 0;
+
+    @Override
+    public String toString(String field) {
+      return "TestRewriteQuery{rewrites=" + numRewrites + "}";
+    }
+
+    @Override
+    public Query rewrite(IndexSearcher indexSearcher) {
+      numRewrites++;
+      return this;
+    }
+
+    @Override
+    public void visit(QueryVisitor visitor) {}
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj instanceof TestRewriteQuery;
+    }
+
+    @Override
+    public int hashCode() {
+      return TestRewriteQuery.class.hashCode();
+    }
+  }
 }