You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/08/25 13:47:20 UTC

lucene-solr:branch_6x: LUCENE-7416: Rewrite optimizations for BooleanQuery.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x e3b08517c -> 1ac9609ca


LUCENE-7416: Rewrite optimizations for BooleanQuery.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1ac9609c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1ac9609c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1ac9609c

Branch: refs/heads/branch_6x
Commit: 1ac9609caedbf739379bdabdac909f77fee2f5c6
Parents: e3b0851
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Aug 25 15:42:28 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 25 15:43:32 2016 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  7 ++-
 .../org/apache/lucene/search/BooleanQuery.java  | 40 +++++++++++++
 .../lucene/search/TestBooleanRewrites.java      | 59 ++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ac9609c/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5e5c955..70092e1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -4,7 +4,12 @@ For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions
 
 ======================= Lucene 6.3.0 =======================
-(No Changes)
+
+Optimizations
+
+* LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
+  in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
+  clauses. (Spyros Kapnissis via Adrien Grand)
 
 ======================= Lucene 6.2.0 =======================
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ac9609c/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index d6a53bd..bf5b29e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -296,6 +296,17 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
       }
     }
 
+    // Check whether some clauses are both required and excluded
+    if (clauseSets.get(Occur.MUST_NOT).size() > 0) {
+      final Set<Query> reqAndExclQueries = new HashSet<Query>(clauseSets.get(Occur.FILTER));
+      reqAndExclQueries.addAll(clauseSets.get(Occur.MUST));
+      reqAndExclQueries.retainAll(clauseSets.get(Occur.MUST_NOT));
+
+      if (reqAndExclQueries.isEmpty() == false) {
+        return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT");
+      }
+    }
+
     // remove FILTER clauses that are also MUST clauses
     // or that match all documents
     if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
@@ -318,6 +329,35 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
       }
     }
 
+    // convert FILTER clauses that are also SHOULD clauses to MUST clauses
+    if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
+      final Collection<Query> filters = clauseSets.get(Occur.FILTER);
+      final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
+
+      Set<Query> intersection = new HashSet<>(filters);
+      intersection.retainAll(shoulds);
+
+      if (intersection.isEmpty() == false) {
+        BooleanQuery.Builder builder = new BooleanQuery.Builder();
+        int minShouldMatch = getMinimumNumberShouldMatch();
+
+        for (BooleanClause clause : clauses) {
+          if (intersection.contains(clause.getQuery())) {
+            if (clause.getOccur() == Occur.SHOULD) {
+              builder.add(new BooleanClause(clause.getQuery(), Occur.MUST));
+              minShouldMatch--;
+            }
+          } else {
+            builder.add(clause);
+          }
+        }
+
+        builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch));
+        return builder.build();
+      }
+    }
+
+
     // Rewrite queries whose single scoring clause is a MUST clause on a
     // MatchAllDocsQuery to a ConstantScoreQuery
     {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ac9609c/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index fd54640..425dc13 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -215,6 +215,65 @@ public class TestBooleanRewrites extends LuceneTestCase {
         .build();
     assertEquals(expected, searcher.rewrite(bq));
   }
+  
+  // Duplicate Should and Filter query is converted to Must (with minShouldMatch -1)
+  public void testConvertShouldAndFilterToMust() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+
+    // no minShouldMatch
+    BooleanQuery bq = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
+        .build();
+    assertEquals(new TermQuery(new Term("foo", "bar")), searcher.rewrite(bq));
+
+
+    // minShouldMatch is set to -1
+    bq = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD)
+        .setMinimumNumberShouldMatch(2)
+        .build();
+
+    BooleanQuery expected = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD)
+        .setMinimumNumberShouldMatch(1)
+        .build();
+    assertEquals(expected, searcher.rewrite(bq));
+  }
+
+  // Duplicate Must or Filter with MustNot returns no match
+  public void testDuplicateMustOrFilterWithMustNot() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+
+    // Test Must with MustNot
+    BooleanQuery bq = new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
+            // other terms
+            .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+            .add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD)
+            //
+            .add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
+            .build();
+
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq));
+
+    // Test Filter with MustNot
+    BooleanQuery bq2 = new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
+            // other terms
+            .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+            .add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD)
+            //
+            .add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
+            .build();
+
+    assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
+  }
 
   public void testRemoveMatchAllFilter() throws IOException {
     IndexSearcher searcher = newSearcher(new MultiReader());