You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/04/07 12:29:24 UTC

[2/2] lucene-solr git commit: SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory. (Jörg Rathlev, Steve Rowe, Christine Poerschke)

SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
(J�rg Rathlev, Steve Rowe, Christine Poerschke)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/adabfdd9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/adabfdd9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/adabfdd9

Branch: refs/heads/master
Commit: adabfdd9c2a50b4141f485655df0d048df21bd23
Parents: 2060215
Author: Christine Poerschke <cp...@apache.org>
Authored: Fri Apr 7 11:11:23 2017 +0100
Committer: Christine Poerschke <cp...@apache.org>
Committed: Fri Apr 7 11:11:23 2017 +0100

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../analysis/ManagedSynonymFilterFactory.java   |  7 ++--
 .../solr/collection1/conf/schema-rest.xml       |  2 +-
 .../TestManagedSynonymFilterFactory.java        | 40 ++++++++++++++++++++
 4 files changed, 48 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9e70c24..62e9189 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -163,6 +163,9 @@ Bug Fixes
 * SOLR-10323: fix to SpellingQueryConverter to properly strip out colons in field-specific queries.
   (Amrit Sarkar via James Dyer)
 
+* SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
+  (J�rg Rathlev, Steve Rowe, Christine Poerschke)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
index 3b5ce2e..ffbbb36 100644
--- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
 import org.apache.lucene.analysis.synonym.SynonymMap;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.util.NamedList;
@@ -358,9 +359,9 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
         for (String term : cpsm.mappings.keySet()) {
           for (String mapping : cpsm.mappings.get(term)) {
             // apply the case setting to match the behavior of the SynonymMap builder
-            String casedTerm = synonymManager.applyCaseSetting(ignoreCase, term);
-            String casedMapping = synonymManager.applyCaseSetting(ignoreCase, mapping);
-            add(new CharsRef(casedTerm), new CharsRef(casedMapping), false);
+            CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder());
+            CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
+            add(casedTerm, casedMapping, false);
           }          
         }
       }      

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
index aa4c21d..85c822a 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
@@ -494,7 +494,7 @@
   <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
 
   <!-- Field type where english stopwords are managed by the REST API -->
-  <fieldType name="managed_en" class="solr.TextField">
+  <fieldType name="managed_en" class="solr.TextField" autoGeneratePhraseQueries="false">
     <analyzer>
       <tokenizer class="solr.StandardTokenizerFactory"/>
       <filter class="solr.ManagedStopFilterFactory" managed="english"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/adabfdd9/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
index 26fcde1..5cacc48 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.rest.schema.analysis;
 import java.io.File;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -154,13 +155,30 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
             "count(/response/lst[@name='field']) = 1",
             "/response/lst[@name='responseHeader']/int[@name='status'] = '0'");
 
+    // multi-term synonym logic - SOLR-10264
+    final String multiTermOrigin;
+    final String multiTermSynonym;
+    if (random().nextBoolean()) {
+      multiTermOrigin  = "hansestadt hamburg";
+      multiTermSynonym = "hh";
+    } else {
+      multiTermOrigin  = "hh";
+      multiTermSynonym = "hansestadt hamburg";
+    }
+    // multi-term logic similar to the angry/mad logic (angry ~ origin, mad ~ synonym)
+
     assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150"));
+    assertU(adoc(newFieldName, multiTermOrigin+" is in North Germany.", "id", "040"));
     assertU(commit());
 
     assertQ("/select?q=" + newFieldName + ":angry",
             "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
             "/response/result[@name='response'][@numFound='1']",
             "/response/result[@name='response']/doc/str[@name='id'][.='5150']");    
+    assertQ("/select?q=" + newFieldName + ":"+URLEncoder.encode(multiTermOrigin, "UTF-8"),
+        "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+        "/response/result[@name='response'][@numFound='1']",
+        "/response/result[@name='response']/doc/str[@name='id'][.='040']");
     
     // add a mapping that will expand a query for "mad" to match docs with "angry"
     syns = new HashMap<>();
@@ -172,12 +190,28 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
     assertJQ(endpoint, 
         "/synonymMappings/managedMap/mad==['angry']");
 
+    // add a mapping that will expand a query for "multi-term synonym" to match docs with "acronym"
+    syns = new HashMap<>();
+    syns.put(multiTermSynonym, Arrays.asList(multiTermOrigin));
+    assertJPut(endpoint,
+               JSONUtil.toJSON(syns),
+               "/responseHeader/status==0");
+
+    assertJQ(endpoint+"/"+URLEncoder.encode(multiTermSynonym, "UTF-8"),
+        "/"+multiTermSynonym+"==['"+multiTermOrigin+"']");
+
     // should not match as the synonym mapping between mad and angry does not    
     // get applied until core reload
     assertQ("/select?q=" + newFieldName + ":mad",
         "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
         "/response/result[@name='response'][@numFound='0']");    
     
+    // should not match as the synonym mapping between "origin" and "synonym"
+    // was not added before the document was indexed
+    assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
+        "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+        "/response/result[@name='response'][@numFound='0']");
+
     restTestHarness.reload();
 
     // now query for mad and we should see our test doc
@@ -186,6 +220,12 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
         "/response/result[@name='response'][@numFound='1']",
         "/response/result[@name='response']/doc/str[@name='id'][.='5150']");    
     
+    // now query for "synonym" and we should see our test doc with "origin"
+    assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
+        "/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
+        "/response/result[@name='response'][@numFound='1']",
+        "/response/result[@name='response']/doc/str[@name='id'][.='040']");
+
     // test for SOLR-6015
     syns = new HashMap<>();
     syns.put("mb", Arrays.asList("megabyte"));