You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2021/06/09 11:02:06 UTC
[solr] branch main updated: SOLR-15449: edismax sow and mm (#158)
This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 9791057 SOLR-15449: edismax sow and mm (#158)
9791057 is described below
commit 9791057a85203ac5d0e5d2a6b13348df53dec84a
Author: Alessandro Benedetti <a....@sease.io>
AuthorDate: Wed Jun 9 12:01:57 2021 +0100
SOLR-15449: edismax sow and mm (#158)
---
solr/CHANGES.txt | 2 +
.../apache/solr/parser/SolrQueryParserBase.java | 7 +++-
.../solr/search/TestExtendedDismaxParser.java | 43 +++++++++++++++-------
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f60c979..3474bf6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -320,6 +320,8 @@ Bug Fixes
* SOLR-15334: Return error response when failing auth in PKIAuthPlugin (Mike Drob)
+* SOLR-15449: Edismax sow incorrectly affect mm parameter in multi field search (Alessandro Benedetti, Michael Gibney, David Smiley)
+
================== 8.10.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index bea7df3..fc12c6b 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -41,6 +41,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -1160,7 +1161,11 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
- // for edismax: ignore parsing failures
+ // ExtendedDismaxQueryParser is a lenient query parser
+ // This happens when a field tries to parse a query term that has a type incompatible with the field
+ // e.g.
+ // a numerical field trying to parse a textual query term
+ subqs.add(new MatchNoDocsQuery());
}
}
if (subqs.size() == 1) {
diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index f9ae300..4efb51f 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -415,19 +415,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);
- assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","true"),
- nor);
- // When sow=false, the per-field query structures differ (no "Terminator" query on integer field foo_i),
- // so a dismax-per-field is constructed. As a result, mm=100% is applied per-field instead of per-term;
- // since there is only one term (100) required in the foo_i field's dismax, the query can match docs that
- // only have the 100 term in the foo_i field, and don't necessarily have "Terminator" in any field.
- assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","false"),
- oner);
- assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"), // default sow=false
- oner);
-
- assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
- oner);
+
assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
@@ -1775,7 +1763,34 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
String resp = h.query(request);
return (String) BaseTestHarness.evaluateXPath(resp, "//str[@name='parsedquery']/text()", XPathConstants.STRING);
}
-
+
+ public void testSplitOnWhitespace_shouldRespectMinimumShouldMatch() {
+ String oner = "*[count(//doc)=1]";
+ String nor = "*[count(//doc)=0]";
+ /*
+ * in multi-field search with different analysis per field
+ * sow=true causes the minimum should match to be "per document"
+ * i.e a document to be a match must contain all the mm query terms anywhere at least once
+ * sow=false causes the minimum should match to be "per field"
+ * i.e a document to be a match must contain all the mm query terms in a single field at least once
+ * See https://issues.apache.org/jira/browse/SOLR-12779 for additional details
+ */
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "true"),
+ nor); //no document contains both terms, in a field or in multiple field
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "false"),
+ nor); //no document contains both terms in a field
+
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
+ oner); //document 46 contains both terms, Terminator in movies_t and 8 in foo_i
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
+ nor); //no document contains both terms in a field
+
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
+ oner); //document 46 contains all terms, mission, impossible, Terminator in movies_t and 8 in foo_i
+ assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
+ nor); //no document contains all terms, in a field
+ }
+
public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception {
// When the *structure* of produced queries is different in each field,
// sow=true produces boolean-of-dismax query structure,