You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2016/06/16 12:08:34 UTC

[04/50] [abbrv] lucene-solr:apiv2: SOLR-8812: edismax: turn off mm processing if no explicit mm spec is provided and there are explicit operators (except for AND) - addresses problems caused by SOLR-2649

SOLR-8812: edismax: turn off mm processing if no explicit mm spec is provided and there are explicit operators (except for AND) - addresses problems caused by SOLR-2649


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/38714399
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/38714399
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/38714399

Branch: refs/heads/apiv2
Commit: 38714399760889d2d7b188a87341aade6139ffef
Parents: 9632069
Author: Steve Rowe <sa...@apache.org>
Authored: Fri Jun 10 02:29:26 2016 -0400
Committer: Steve Rowe <sa...@apache.org>
Committed: Fri Jun 10 02:29:26 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   4 +
 .../solr/search/ExtendedDismaxQParser.java      |  37 ++++-
 .../solr/search/TestExtendedDismaxParser.java   | 155 +++++++++++++++++++
 3 files changed, 191 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38714399/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ceb7a4a..73a60f2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -203,6 +203,10 @@ Bug Fixes
 
 * SOLR-9198: config APIs unable to add multiple values with same name (noble)
 
+* SOLR-8812: edismax: turn off mm processing if no explicit mm spec is provided
+  and there are explicit operators (except for AND) - addresses problems caused by SOLR-2649.
+  (Greg Pendlebury, Jan H�ydahl, Erick Erickson, Steve Rowe)
+
 Optimizations
 ----------------------
 * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38714399/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
index 8401f3e..c6e5116 100644
--- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
@@ -337,11 +337,17 @@ public class ExtendedDismaxQParser extends QParser {
     if(query == null) {
       return null;
     }
-
-    // For correct lucene queries, turn off mm processing if there
-    // were explicit operators (except for AND).
+    // For correct lucene queries, turn off mm processing if no explicit mm spec was provided
+    // and there were explicit operators (except for AND).
     if (query instanceof BooleanQuery) {
-      query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch, config.mmAutoRelax);
+      // config.minShouldMatch holds the value of mm which MIGHT have come from the user,
+      // but could also have been derived from q.op.
+      String mmSpec = config.minShouldMatch;
+
+      if (foundOperators(clauses, config.lowercaseOperators)) {
+        mmSpec = params.get(DisMaxParams.MM, "0%"); // Use provided mm spec if present, otherwise turn off mm processing
+      }
+      query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, mmSpec, config.mmAutoRelax);
     }
     return query;
   }
@@ -391,7 +397,28 @@ public class ExtendedDismaxQParser extends QParser {
     }
     return sb.toString();
   }
-  
+
+  /**
+   * Returns true if at least one of the clauses is/has an explicit operator (except for AND)
+   */
+  private boolean foundOperators(List<Clause> clauses, boolean lowercaseOperators) {
+    for (Clause clause : clauses) {
+      if (clause.must == '+') return true;
+      if (clause.must == '-') return true;
+      if (clause.isBareWord()) {
+        String s = clause.val;
+        if ("OR".equals(s)) {
+          return true;
+        } else if ("NOT".equals(s)) {
+          return true;
+        } else if (lowercaseOperators && "or".equals(s)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
   /**
    * Generates a query string from the raw clauses, uppercasing 
    * 'and' and 'or' as needed.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/38714399/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index 019c54d..9b305f2 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -81,6 +81,13 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
     assertU(adoc("id", "63", "text_sw", "gold stocks"));
     assertU(adoc("id", "64", "text_sw", "stocks gold stockade"));
     assertU(adoc("id", "65", "text_sw", "snake oil"));
+    // SOLR-8812 user query example
+    assertU(adoc("id", "66", "text_sw", "hair ties barbie"));
+    assertU(adoc("id", "67", "text_sw", "hair ties"));
+    assertU(adoc("id", "68", "text_sw", "hair barbie"));
+    assertU(adoc("id", "69", "text_sw", "ties barbie"));
+    assertU(adoc("id", "70", "text_sw", "hair"));
+    assertU(adoc("id", "71", "text_sw", "ties"));
     assertU(commit());
   }
 
@@ -1145,6 +1152,154 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
             "mm", "100%",
             "defType", "edismax")
         , "*[count(//doc)=0]");
+
+    // SOLR-9174
+    assertQ("test minShouldMatch=1<-1 with explicit OR, one impossible clause, and no explicit q.op",
+        req("q", "barbie OR (hair AND nonexistentword)",
+            "qf", "text_sw",
+            "mm", "1<-1",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+  }
+
+  /* SOLR-8812 */
+  @Test
+  public void testDefaultMM() throws Exception {
+    // Ensure MM is off when explicit operators (+/-/OR/NOT) are used and no explicit mm spec is specified.
+    assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%",
+        req("q", "oil OR stocks",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=4]");
+    assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%",
+        req("q", "oil or stocks",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "lowercaseOperators", "true",
+            "defType", "edismax")
+        , "*[count(//doc)=4]");
+    assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "oil OR stocks",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=4]");
+    assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "oil stocks",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=4]");
+    assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%",
+        req("q", "oil stocks",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "oil stocks",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=4]");
+
+    assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "hair ties -barbie",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+    assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "hair ties NOT barbie",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+
+    assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair ties -barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+    assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair ties NOT barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+
+    assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair AND ties -barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair AND ties -barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+
+    assertQ("No explicit non-AND operator in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair AND ties barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=2]");
+    assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%",
+        req("q", "hair AND ties barbie",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "hair AND ties barbie",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=2]");
+    assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "hair and ties barbie",
+            "qf", "text_sw",
+            "lowercaseOperators", "true",
+            "defType", "edismax")
+        , "*[count(//doc)=2]");
+
+    assertQ("Explicit '-' operator in query with no explicit mm and q.op=AND => mm = 100%",
+        req("q", "hair ties -barbie",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%",
+        req("q", "hair ties NOT barbie",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+
+    assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%",
+        req("q", "hair OR ties barbie",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=3]");
+    assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%",
+        req("q", "hair OR ties barbie",
+            "qf", "text_sw",
+            "q.op", "OR",
+            "defType", "edismax")
+        , "*[count(//doc)=6]");
+    assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
+        req("q", "hair OR ties barbie",
+            "qf", "text_sw",
+            "defType", "edismax")
+        , "*[count(//doc)=6]");
+
+    assertQ("Explicit '+' operator in query with no explicit mm and q.op=AND => mm = 0%",
+        req("q", "hair ties +barbie",
+            "qf", "text_sw",
+            "q.op", "AND",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
   }
 
   public void testEdismaxSimpleExtension() throws SyntaxError {