You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2021/06/09 11:02:06 UTC

[solr] branch main updated: SOLR-15449: edismax sow and mm (#158)

This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 9791057  SOLR-15449: edismax sow and mm (#158)
9791057 is described below

commit 9791057a85203ac5d0e5d2a6b13348df53dec84a
Author: Alessandro Benedetti <a....@sease.io>
AuthorDate: Wed Jun 9 12:01:57 2021 +0100

    SOLR-15449: edismax sow and mm (#158)
---
 solr/CHANGES.txt                                   |  2 +
 .../apache/solr/parser/SolrQueryParserBase.java    |  7 +++-
 .../solr/search/TestExtendedDismaxParser.java      | 43 +++++++++++++++-------
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f60c979..3474bf6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -320,6 +320,8 @@ Bug Fixes
 
 * SOLR-15334: Return error response when failing auth in PKIAuthPlugin (Mike Drob)
 
+* SOLR-15449: Edismax sow incorrectly affect mm parameter in multi field search (Alessandro Benedetti, Michael Gibney, David Smiley)
+
 ==================  8.10.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index bea7df3..fc12c6b 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -41,6 +41,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.PhraseQuery;
@@ -1160,7 +1161,11 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
               try {
                 subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
               } catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
-                // for edismax: ignore parsing failures
+                // ExtendedDismaxQueryParser is a lenient query parser 
+                // This happens when a field tries to parse a query term that has a type incompatible with the field
+                // e.g.
+                // a numerical field trying to parse a textual query term
+                subqs.add(new MatchNoDocsQuery());
               }
             }
             if (subqs.size() == 1) {
diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index f9ae300..4efb51f 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -415,19 +415,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
     assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
             twor);
 
-    assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","true"),
-            nor);
-    // When sow=false, the per-field query structures differ (no "Terminator" query on integer field foo_i),
-    // so a dismax-per-field is constructed.  As a result, mm=100% is applied per-field instead of per-term;
-    // since there is only one term (100) required in the foo_i field's dismax, the query can match docs that
-    // only have the 100 term in the foo_i field, and don't necessarily have "Terminator" in any field.
-    assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","false"),
-            oner);
-    assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"), // default sow=false
-        oner);
-
-    assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
-            oner);
+    
 
     assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
             twor);
@@ -1775,7 +1763,34 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
     String resp = h.query(request);
     return (String) BaseTestHarness.evaluateXPath(resp, "//str[@name='parsedquery']/text()", XPathConstants.STRING);
   }
-  
+
+  public void testSplitOnWhitespace_shouldRespectMinimumShouldMatch() {
+    String oner = "*[count(//doc)=1]";
+    String nor = "*[count(//doc)=0]";
+    /*
+     * in multi-field search with different analysis per field
+     * sow=true causes the minimum should match to be "per document"
+     * i.e a document to be a match must contain all the mm query terms anywhere at least once
+     * sow=false causes the minimum should match to be "per field"
+     * i.e a document to be a match must contain all the mm query terms in a single field at least once
+     * See  https://issues.apache.org/jira/browse/SOLR-12779 for additional details
+     */
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "true"),
+        nor); //no document contains both terms, in a field or in multiple field
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "false"),
+        nor); //no document contains both terms in a field
+
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
+        oner); //document 46 contains both terms, Terminator in movies_t and 8 in foo_i
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
+        nor); //no document contains both terms in a field
+
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
+        oner); //document 46 contains all terms, mission, impossible, Terminator in movies_t and 8 in foo_i
+    assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
+        nor); //no document contains all terms, in a field
+  }
+    
   public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception {
     // When the *structure* of produced queries is different in each field, 
     // sow=true produces boolean-of-dismax query structure,