You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/06/20 12:07:56 UTC

[lucene] branch branch_9x updated: LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new bb1b3dce04c LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
bb1b3dce04c is described below

commit bb1b3dce04c06e7533b2ff418b8b7c2544534e24
Author: JoeHF <ho...@gmail.com>
AuthorDate: Mon Jun 20 20:04:38 2022 +0800

    LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
---
 lucene/CHANGES.txt                                 |  2 +
 .../org/apache/lucene/search/BooleanQuery.java     | 24 +++++++
 .../lucene/search/TestBooleanMinShouldMatch.java   | 25 +++++++
 .../apache/lucene/search/TestBooleanRewrites.java  | 83 +++++++++++++++++++++-
 .../apache/lucene/search/TestMinShouldMatch2.java  |  8 +--
 5 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 72850ea120b..bf2fb7a6088 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -91,6 +91,8 @@ Optimizations
 ---------------------
 * LUCENE-8519: MultiDocValues.getNormValues should not call getMergedFieldInfos (Rushabh Shah)
 
+* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
+
 Bug Fixes
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 165bf12de9f..c7bf1b39d01 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -533,6 +533,30 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
       }
     }
 
+    // SHOULD clause count less than or equal to minimumNumberShouldMatch
+    // Important(this can only be processed after nested clauses have been flattened)
+    {
+      final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
+      if (shoulds.size() > 0) {
+        if (shoulds.size() < minimumNumberShouldMatch) {
+          return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
+        }
+
+        if (shoulds.size() == minimumNumberShouldMatch) {
+          BooleanQuery.Builder builder = new BooleanQuery.Builder();
+          for (BooleanClause clause : clauses) {
+            if (clause.getOccur() == Occur.SHOULD) {
+              builder.add(clause.getQuery(), Occur.MUST);
+            } else {
+              builder.add(clause);
+            }
+          }
+
+          return builder.build();
+        }
+      }
+    }
+
     return super.rewrite(reader);
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
index b1b1448ebc4..1db3b4ebc74 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
@@ -436,6 +436,31 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
     assertSubsetOfSameScores(q2.build(), top1, top2);
   }
 
+  public void testFlattenInnerDisjunctions() throws Exception {
+    Query q =
+        new BooleanQuery.Builder()
+            .setMinimumNumberShouldMatch(2)
+            .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
+            .build();
+    verifyNrHits(q, 1);
+
+    Query inner =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
+            .build();
+    q =
+        new BooleanQuery.Builder()
+            .setMinimumNumberShouldMatch(2)
+            .add(inner, BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
+            .build();
+
+    verifyNrHits(q, 0);
+  }
+
   protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
 
     System.err.println("------- " + test + " -------");
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index 8ffb04bced3..f0bf8ff96ff 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -604,7 +604,7 @@ public class TestBooleanRewrites extends LuceneTestCase {
             .add(inner, Occur.SHOULD)
             .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
             .build();
-    assertSame(query, searcher.rewrite(query));
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
 
     inner =
         new BooleanQuery.Builder()
@@ -783,4 +783,85 @@ public class TestBooleanRewrites extends LuceneTestCase {
                 .build());
     assertEquals(expected, searcher.rewrite(query));
   }
+
+  public void testShouldClausesLessThanOrEqualToMinimumNumberShouldMatch() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+
+    // The only one SHOULD clause is MatchNoDocsQuery
+    BooleanQuery query =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+            .setMinimumNumberShouldMatch(1)
+            .build();
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+    query =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+            .setMinimumNumberShouldMatch(0)
+            .build();
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+
+    // Meaningful SHOULD clause count is less than MinimumNumberShouldMatch
+    query =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+            .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
+            .setMinimumNumberShouldMatch(2)
+            .build();
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+
+    // Meaningful SHOULD clause count is equal to MinimumNumberShouldMatch
+    query =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+            .add(
+                new PhraseQuery.Builder()
+                    .add(new Term("field", "a"))
+                    .add(new Term("field", "c"))
+                    .build(),
+                Occur.SHOULD)
+            .setMinimumNumberShouldMatch(2)
+            .build();
+    BooleanQuery expected =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("field", "b")), Occur.MUST)
+            .add(
+                new PhraseQuery.Builder()
+                    .add(new Term("field", "a"))
+                    .add(new Term("field", "c"))
+                    .build(),
+                Occur.MUST)
+            .build();
+    assertEquals(expected, searcher.rewrite(query));
+
+    // Invalid Inner query get removed after rewrite
+    Query inner =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+            .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
+            .setMinimumNumberShouldMatch(2)
+            .build();
+
+    query =
+        new BooleanQuery.Builder()
+            .add(inner, Occur.SHOULD)
+            .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+            .add(
+                new PhraseQuery.Builder()
+                    .add(new Term("field", "a"))
+                    .add(new Term("field", "c"))
+                    .build(),
+                Occur.SHOULD)
+            .setMinimumNumberShouldMatch(2)
+            .build();
+    assertEquals(expected, searcher.rewrite(query));
+
+    query =
+        new BooleanQuery.Builder()
+            .add(inner, Occur.SHOULD)
+            .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+            .setMinimumNumberShouldMatch(2)
+            .build();
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
index e293854fecb..adde74c7ba0 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
@@ -253,7 +253,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
     termsList.addAll(Arrays.asList(rareTerms));
     String[] terms = termsList.toArray(new String[0]);
 
-    for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+    for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
       Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
       Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
       assertNext(expected, actual);
@@ -273,7 +273,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
     String[] terms = termsList.toArray(new String[0]);
 
     for (int amount = 25; amount < 200; amount += 25) {
-      for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+      for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
         Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
         Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
         assertAdvance(expected, actual, amount);
@@ -294,7 +294,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
     Collections.shuffle(termsList, random());
     for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
       String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
-      for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+      for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
         Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
         Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
         assertNext(expected, actual);
@@ -318,7 +318,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
     for (int amount = 25; amount < 200; amount += 25) {
       for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
         String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
-        for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+        for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
           Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
           Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
           assertAdvance(expected, actual, amount);