You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/04/07 12:29:24 UTC
[2/2] lucene-solr git commit: SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory. (Jörg Rathlev, Steve Rowe, Christine Poerschke)
SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
(J�rg Rathlev, Steve Rowe, Christine Poerschke)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/adabfdd9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/adabfdd9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/adabfdd9
Branch: refs/heads/master
Commit: adabfdd9c2a50b4141f485655df0d048df21bd23
Parents: 2060215
Author: Christine Poerschke <cp...@apache.org>
Authored: Fri Apr 7 11:11:23 2017 +0100
Committer: Christine Poerschke <cp...@apache.org>
Committed: Fri Apr 7 11:11:23 2017 +0100
----------------------------------------------------------------------
solr/CHANGES.txt | 3 ++
.../analysis/ManagedSynonymFilterFactory.java | 7 ++--
.../solr/collection1/conf/schema-rest.xml | 2 +-
.../TestManagedSynonymFilterFactory.java | 40 ++++++++++++++++++++
4 files changed, 48 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9e70c24..62e9189 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -163,6 +163,9 @@ Bug Fixes
* SOLR-10323: fix to SpellingQueryConverter to properly strip out colons in field-specific queries.
(Amrit Sarkar via James Dyer)
+* SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
+ (J�rg Rathlev, Steve Rowe, Christine Poerschke)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
index 3b5ce2e..ffbbb36 100644
--- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
@@ -358,9 +359,9 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
for (String term : cpsm.mappings.keySet()) {
for (String mapping : cpsm.mappings.get(term)) {
// apply the case setting to match the behavior of the SynonymMap builder
- String casedTerm = synonymManager.applyCaseSetting(ignoreCase, term);
- String casedMapping = synonymManager.applyCaseSetting(ignoreCase, mapping);
- add(new CharsRef(casedTerm), new CharsRef(casedMapping), false);
+ CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder());
+ CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
+ add(casedTerm, casedMapping, false);
}
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
index aa4c21d..85c822a 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
@@ -494,7 +494,7 @@
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<!-- Field type where english stopwords are managed by the REST API -->
- <fieldType name="managed_en" class="solr.TextField">
+ <fieldType name="managed_en" class="solr.TextField" autoGeneratePhraseQueries="false">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
index 26fcde1..5cacc48 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
@@ -16,6 +16,7 @@
*/
package org.apache.solr.rest.schema.analysis;
import java.io.File;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -154,13 +155,30 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
"count(/response/lst[@name='field']) = 1",
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'");
+ // multi-term synonym logic - SOLR-10264
+ final String multiTermOrigin;
+ final String multiTermSynonym;
+ if (random().nextBoolean()) {
+ multiTermOrigin = "hansestadt hamburg";
+ multiTermSynonym = "hh";
+ } else {
+ multiTermOrigin = "hh";
+ multiTermSynonym = "hansestadt hamburg";
+ }
+ // multi-term logic similar to the angry/mad logic (angry ~ origin, mad ~ synonym)
+
assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150"));
+ assertU(adoc(newFieldName, multiTermOrigin+" is in North Germany.", "id", "040"));
assertU(commit());
assertQ("/select?q=" + newFieldName + ":angry",
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
"/response/result[@name='response'][@numFound='1']",
"/response/result[@name='response']/doc/str[@name='id'][.='5150']");
+ assertQ("/select?q=" + newFieldName + ":"+URLEncoder.encode(multiTermOrigin, "UTF-8"),
+ "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+ "/response/result[@name='response'][@numFound='1']",
+ "/response/result[@name='response']/doc/str[@name='id'][.='040']");
// add a mapping that will expand a query for "mad" to match docs with "angry"
syns = new HashMap<>();
@@ -172,12 +190,28 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
assertJQ(endpoint,
"/synonymMappings/managedMap/mad==['angry']");
+ // add a mapping that will expand a query for "multi-term synonym" to match docs with "acronym"
+ syns = new HashMap<>();
+ syns.put(multiTermSynonym, Arrays.asList(multiTermOrigin));
+ assertJPut(endpoint,
+ JSONUtil.toJSON(syns),
+ "/responseHeader/status==0");
+
+ assertJQ(endpoint+"/"+URLEncoder.encode(multiTermSynonym, "UTF-8"),
+ "/"+multiTermSynonym+"==['"+multiTermOrigin+"']");
+
// should not match as the synonym mapping between mad and angry does not
// get applied until core reload
assertQ("/select?q=" + newFieldName + ":mad",
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
"/response/result[@name='response'][@numFound='0']");
+ // should not match as the synonym mapping between "origin" and "synonym"
+ // was not added before the document was indexed
+ assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
+ "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+ "/response/result[@name='response'][@numFound='0']");
+
restTestHarness.reload();
// now query for mad and we should see our test doc
@@ -186,6 +220,12 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
"/response/result[@name='response'][@numFound='1']",
"/response/result[@name='response']/doc/str[@name='id'][.='5150']");
+ // now query for "synonym" and we should see our test doc with "origin"
+ assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
+ "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+ "/response/result[@name='response'][@numFound='1']",
+ "/response/result[@name='response']/doc/str[@name='id'][.='040']");
+
// test for SOLR-6015
syns = new HashMap<>();
syns.put("mb", Arrays.asList("megabyte"));