You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/08 13:02:12 UTC
svn commit: r1407027 - in /lucene/dev/branches/lucene_solr_3_6: ./ lucene/
lucene/contrib/facet/ lucene/contrib/spellchecker/ lucene/core/src/
lucene/core/src/test/org/apache/lucene/analysis/ lucene/site/
lucene/test-framework/src/java/org/apache/lucen...
Author: rmuir
Date: Thu Nov 8 12:02:10 2012
New Revision: 1407027
URL: http://svn.apache.org/viewvc?rev=1407027&view=rev
Log:
SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
Modified:
lucene/dev/branches/lucene_solr_3_6/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/contrib/facet/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/contrib/spellchecker/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/analysis/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/site/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/tools/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_3_6/solr/NOTICE.txt (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/common-build.xml (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/contrib/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/core/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
lucene/dev/branches/lucene_solr_3_6/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
lucene/dev/branches/lucene_solr_3_6/solr/example/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/lib/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/solrj/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/test-framework/ (props changed)
lucene/dev/branches/lucene_solr_3_6/solr/testlogging.properties (props changed)
Modified: lucene/dev/branches/lucene_solr_3_6/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/solr/CHANGES.txt?rev=1407027&r1=1407026&r2=1407027&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_6/solr/CHANGES.txt Thu Nov 8 12:02:10 2012
@@ -27,6 +27,9 @@ Bug Fixes
* SOLR-3790: ConcurrentModificationException could be thrown when using hl.fl=*.
(yonik, koji)
+* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
+ (Tom Burton-West, Robert Muir)
+
================== 3.6.1 ==================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
Modified: lucene/dev/branches/lucene_solr_3_6/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=1407027&r1=1407026&r2=1407027&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Thu Nov 8 12:02:10 2012
@@ -121,7 +121,10 @@ class ExtendedDismaxQParser extends QPar
SolrParams params = getParams();
solrParams = SolrParams.wrapDefaults(localParams, params);
-
+ // Solr 4.0 sets the default at 0% if q.op=OR and %100 if q.op =AND
+ // just go with the flow and use 3.6 default of 100% here
+ final String minShouldMatch = solrParams.get(DisMaxParams.MM, "100%");
+
userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)));
queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams(DisMaxParams.QF));
@@ -235,7 +238,9 @@ class ExtendedDismaxQParser extends QPar
// For correct lucene queries, turn off mm processing if there
// were explicit operators (except for AND).
boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;
-
+ // but always for unstructured implicit bqs created by getFieldQuery
+ up.minShouldMatch = minShouldMatch;
+
try {
up.setRemoveStopFilter(!stopwords);
up.exceptions = true;
@@ -252,7 +257,6 @@ class ExtendedDismaxQParser extends QPar
}
if (parsedUserQuery != null && doMinMatched) {
- String minShouldMatch = solrParams.get(DisMaxParams.MM, "100%");
if (parsedUserQuery instanceof BooleanQuery) {
SolrPluginUtils.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch);
}
@@ -295,8 +299,7 @@ class ExtendedDismaxQParser extends QPar
parsedUserQuery = up.parse(escapedUserQuery);
// Only do minimum-match logic
- String minShouldMatch = solrParams.get(DisMaxParams.MM, "100%");
-
+
if (parsedUserQuery instanceof BooleanQuery) {
BooleanQuery t = new BooleanQuery();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery);
@@ -874,6 +877,7 @@ class ExtendedDismaxQParser extends QPar
// used when constructing boosting part of query via sloppy phrases
boolean exceptions; // allow exceptions to be thrown (for example on a missing field)
+ String minShouldMatch; // for inner boolean queries produced from a single fieldQuery
ExtendedAnalyzer analyzer;
/**
@@ -1117,6 +1121,18 @@ class ExtendedDismaxQParser extends QPar
case FIELD: // fallthrough
case PHRASE:
Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
+ // A BooleanQuery is only possible from getFieldQuery if it came from
+ // a single whitespace separated term. In this case, check the coordination
+ // factor on the query: if its enabled, that means we aren't a set of synonyms
+ // but instead multiple terms from one whitespace-separated term, we must
+ // apply minShouldMatch here so that it works correctly with other things
+ // like aliasing.
+ if (query instanceof BooleanQuery) {
+ BooleanQuery bq = (BooleanQuery) query;
+ if (!bq.isCoordDisabled()) {
+ SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
+ }
+ }
if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery)query;
if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
Modified: lucene/dev/branches/lucene_solr_3_6/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=1407027&r1=1407026&r2=1407027&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Thu Nov 8 12:02:10 2012
@@ -55,6 +55,16 @@ public class TestExtendedDismaxParser ex
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
assertU(adoc("id", "50", "text_sw", "start new big city end"));
assertU(adoc("id", "51", "store", "12.34,-56.78"));
+ assertU(adoc("id", "52", "text_sw", "tekna theou klethomen"));
+ assertU(adoc("id", "53", "text_sw", "nun tekna theou esmen"));
+ assertU(adoc("id", "54", "text_sw", "phanera estin ta tekna tou theou"));
+ assertU(adoc("id", "55", "standardtok", "大"));
+ assertU(adoc("id", "56", "standardtok", "大äº"));
+ assertU(adoc("id", "57", "standardtok", "大äºæ¹¾"));
+ assertU(adoc("id", "58", "HTMLstandardtok", "大"));
+ assertU(adoc("id", "59", "HTMLstandardtok", "大äº"));
+ assertU(adoc("id", "60", "HTMLstandardtok", "大äºæ¹¾"));
+ assertU(adoc("id", "61", "text_sw", "bazaaa")); // synonyms in an expansion group
assertU(commit());
}
@Override
@@ -501,4 +511,151 @@ public class TestExtendedDismaxParser ex
"//str[@name='id'][.='145']",
"//str[@name='id'][.='146']");
}
+ /**
+ * SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
+ */
+ public void testCJK() throws Exception {
+ assertQ("test cjk (disjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=3]");
+ assertQ("test cjk (minShouldMatch)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]");
+ assertQ("test cjk (conjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+ }
+ /**
+ * test that minShouldMatch works with aliasing
+ * for implicit boolean queries
+ */
+ public void testCJKAliasing() throws Exception {
+ // single field
+ assertQ("test cjk (aliasing+disjunction)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=3]");
+ assertQ("test cjk (aliasing+minShouldMatch)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]");
+ assertQ("test cjk (aliasing+conjunction)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+ // multifield
+ assertQ("test cjk (aliasing+disjunction)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok HTMLstandardtok",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=6]");
+ assertQ("test cjk (aliasing+minShouldMatch)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok HTMLstandardtok",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=4]");
+ assertQ("test cjk (aliasing+conjunction)",
+ req("q", "myalias:大äºæ¹¾",
+ "f.myalias.qf", "standardtok HTMLstandardtok",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]");
+ }
+
+ /** Test that we apply boosts correctly */
+ public void testCJKBoosts() throws Exception {
+ assertQ("test cjk (disjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok^2 HTMLstandardtok",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='57']");
+ assertQ("test cjk (minShouldMatch)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok^2 HTMLstandardtok",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='57']");
+ assertQ("test cjk (conjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok^2 HTMLstandardtok",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='57']");
+
+ // now boost the other field
+ assertQ("test cjk (disjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok HTMLstandardtok^2",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='60']");
+ assertQ("test cjk (minShouldMatch)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok HTMLstandardtok^2",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='60']");
+ assertQ("test cjk (conjunction)",
+ req("q", "大äºæ¹¾",
+ "qf", "standardtok HTMLstandardtok^2",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='60']");
+ }
+
+ /** always apply minShouldMatch to the inner booleanqueries
+ * created from whitespace, as these are never structured lucene queries
+ * but only come from unstructured text */
+ public void testCJKStructured() throws Exception {
+ assertQ("test cjk (disjunction)",
+ req("q", "大äºæ¹¾ OR bogus",
+ "qf", "standardtok",
+ "mm", "0%",
+ "defType", "edismax")
+ , "*[count(//doc)=3]");
+ assertQ("test cjk (minShouldMatch)",
+ req("q", "大äºæ¹¾ OR bogus",
+ "qf", "standardtok",
+ "mm", "67%",
+ "defType", "edismax")
+ , "*[count(//doc)=2]");
+ assertQ("test cjk (conjunction)",
+ req("q", "大äºæ¹¾ OR bogus",
+ "qf", "standardtok",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+ }
+
+ /**
+ * Test that we don't apply minShouldMatch to the inner boolean queries
+ * when there are synonyms (these are indicated by coordination factor)
+ */
+ public void testSynonyms() throws Exception {
+ // document only contains baraaa, but should still match.
+ assertQ("test synonyms",
+ req("q", "fooaaa",
+ "qf", "text_sw",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+ }
}