You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/06/20 12:07:56 UTC
[lucene] branch branch_9x updated: LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new bb1b3dce04c LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
bb1b3dce04c is described below
commit bb1b3dce04c06e7533b2ff418b8b7c2544534e24
Author: JoeHF <ho...@gmail.com>
AuthorDate: Mon Jun 20 20:04:38 2022 +0800
LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
---
lucene/CHANGES.txt | 2 +
.../org/apache/lucene/search/BooleanQuery.java | 24 +++++++
.../lucene/search/TestBooleanMinShouldMatch.java | 25 +++++++
.../apache/lucene/search/TestBooleanRewrites.java | 83 +++++++++++++++++++++-
.../apache/lucene/search/TestMinShouldMatch2.java | 8 +--
5 files changed, 137 insertions(+), 5 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 72850ea120b..bf2fb7a6088 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -91,6 +91,8 @@ Optimizations
---------------------
* LUCENE-8519: MultiDocValues.getNormValues should not call getMergedFieldInfos (Rushabh Shah)
+* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
+
Bug Fixes
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 165bf12de9f..c7bf1b39d01 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -533,6 +533,30 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
}
+ // SHOULD clause count less than or equal to minimumNumberShouldMatch
+ // Important(this can only be processed after nested clauses have been flattened)
+ {
+ final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
+ if (shoulds.size() > 0) {
+ if (shoulds.size() < minimumNumberShouldMatch) {
+ return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
+ }
+
+ if (shoulds.size() == minimumNumberShouldMatch) {
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ for (BooleanClause clause : clauses) {
+ if (clause.getOccur() == Occur.SHOULD) {
+ builder.add(clause.getQuery(), Occur.MUST);
+ } else {
+ builder.add(clause);
+ }
+ }
+
+ return builder.build();
+ }
+ }
+ }
+
return super.rewrite(reader);
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
index b1b1448ebc4..1db3b4ebc74 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
@@ -436,6 +436,31 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
assertSubsetOfSameScores(q2.build(), top1, top2);
}
+ public void testFlattenInnerDisjunctions() throws Exception {
+ Query q =
+ new BooleanQuery.Builder()
+ .setMinimumNumberShouldMatch(2)
+ .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
+ .build();
+ verifyNrHits(q, 1);
+
+ Query inner =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
+ .build();
+ q =
+ new BooleanQuery.Builder()
+ .setMinimumNumberShouldMatch(2)
+ .add(inner, BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
+ .build();
+
+ verifyNrHits(q, 0);
+ }
+
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
System.err.println("------- " + test + " -------");
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index 8ffb04bced3..f0bf8ff96ff 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -604,7 +604,7 @@ public class TestBooleanRewrites extends LuceneTestCase {
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
- assertSame(query, searcher.rewrite(query));
+ assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
inner =
new BooleanQuery.Builder()
@@ -783,4 +783,85 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build());
assertEquals(expected, searcher.rewrite(query));
}
+
+ public void testShouldClausesLessThanOrEqualToMinimumNumberShouldMatch() throws IOException {
+ IndexSearcher searcher = newSearcher(new MultiReader());
+
+ // The only one SHOULD clause is MatchNoDocsQuery
+ BooleanQuery query =
+ new BooleanQuery.Builder()
+ .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+ .setMinimumNumberShouldMatch(1)
+ .build();
+ assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+ query =
+ new BooleanQuery.Builder()
+ .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+ .setMinimumNumberShouldMatch(0)
+ .build();
+ assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+
+ // Meaningful SHOULD clause count is less than MinimumNumberShouldMatch
+ query =
+ new BooleanQuery.Builder()
+ .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+ .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build();
+ assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+
+ // Meaningful SHOULD clause count is equal to MinimumNumberShouldMatch
+ query =
+ new BooleanQuery.Builder()
+ .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+ .add(
+ new PhraseQuery.Builder()
+ .add(new Term("field", "a"))
+ .add(new Term("field", "c"))
+ .build(),
+ Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build();
+ BooleanQuery expected =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("field", "b")), Occur.MUST)
+ .add(
+ new PhraseQuery.Builder()
+ .add(new Term("field", "a"))
+ .add(new Term("field", "c"))
+ .build(),
+ Occur.MUST)
+ .build();
+ assertEquals(expected, searcher.rewrite(query));
+
+ // Invalid Inner query get removed after rewrite
+ Query inner =
+ new BooleanQuery.Builder()
+ .add(new PhraseQuery.Builder().build(), Occur.SHOULD)
+ .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build();
+
+ query =
+ new BooleanQuery.Builder()
+ .add(inner, Occur.SHOULD)
+ .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+ .add(
+ new PhraseQuery.Builder()
+ .add(new Term("field", "a"))
+ .add(new Term("field", "c"))
+ .build(),
+ Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build();
+ assertEquals(expected, searcher.rewrite(query));
+
+ query =
+ new BooleanQuery.Builder()
+ .add(inner, Occur.SHOULD)
+ .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build();
+ assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
index e293854fecb..adde74c7ba0 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
@@ -253,7 +253,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
termsList.addAll(Arrays.asList(rareTerms));
String[] terms = termsList.toArray(new String[0]);
- for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+ for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertNext(expected, actual);
@@ -273,7 +273,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
String[] terms = termsList.toArray(new String[0]);
for (int amount = 25; amount < 200; amount += 25) {
- for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+ for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertAdvance(expected, actual, amount);
@@ -294,7 +294,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
Collections.shuffle(termsList, random());
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
- for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+ for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertNext(expected, actual);
@@ -318,7 +318,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
for (int amount = 25; amount < 200; amount += 25) {
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
- for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
+ for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertAdvance(expected, actual, amount);