You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/20 15:56:35 UTC

svn commit: r987509 - in /lucene/dev/trunk/solr: ./ src/java/org/apache/solr/handler/component/ src/test/org/apache/solr/handler/component/ src/test/test-files/solr/conf/

Author: gsingers
Date: Fri Aug 20 13:56:35 2010
New Revision: 987509

URL: http://svn.apache.org/viewvc?rev=987509&view=rev
Log:
SOLR-1630: fix minor collation issue

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
    lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema.xml
    lucene/dev/trunk/solr/src/test/test-files/solr/conf/solrconfig.xml

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=987509&r1=987508&r2=987509&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Aug 20 13:56:35 2010
@@ -412,7 +412,10 @@ Bug Fixes
 * SOLR-2036: Avoid expensive fieldCache ram estimation for the
   admin stats page. (yonik)
 
-* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)  
+* SOLR-2047: ReplicationHandler should accept bool type for enable flag. (koji)
+
+* SOLR-1630: Fix spell checking collation issue related to token positions (rmuir, gsingers) 
+
 
 Other Changes
 ----------------------

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=987509&r1=987508&r2=987509&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Fri Aug 20 13:56:35 2010
@@ -450,6 +450,8 @@ public class SpellCheckComponent extends
       for (Iterator<Map.Entry<Token, String>> bestIter = best.entrySet().iterator(); bestIter.hasNext();) {
         Map.Entry<Token, String> entry = bestIter.next();
         Token tok = entry.getKey();
+        // we are replacing the query in order, but injected terms might cause illegal offsets due to previous replacements.
+        if (tok.getPositionIncrement() == 0) continue;
         collation.replace(tok.startOffset() + offset, 
           tok.endOffset() + offset, entry.getValue());
         offset += entry.getValue().length() - (tok.endOffset() - tok.startOffset());

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java?rev=987509&r1=987508&r2=987509&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java Fri Aug 20 13:56:35 2010
@@ -57,6 +57,7 @@ public class SpellCheckComponentTest ext
     assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "boue")));
     assertNull(h.validateUpdate(adoc("id", "7", "lowerfilt", "glue")));
     assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "blee")));
+    assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "pixmaa")));
     assertNull(h.validateUpdate(commit()));
   }
   
@@ -235,6 +236,30 @@ public class SpellCheckComponentTest ext
     collation = (String) suggestions.get("collation");
     assertEquals("document brown",collation);
   }
+  
+  @Test
+  public void testCollate2() throws Exception {
+    SolrCore core = h.getCore();
+    SearchComponent speller = core.getSearchComponent("spellcheck");
+    assertTrue("speller is null and it shouldn't be", speller != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.QT, "spellCheckCompRH");
+    params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
+    params.add(CommonParams.Q, "pixma-a-b-c-d-e-f-g");
+    params.add(SpellCheckComponent.COMPONENT_NAME, "true");
+    params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
+
+    SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+    SolrQueryResponse rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    NamedList values = rsp.getValues();
+    NamedList spellCheck = (NamedList) values.get("spellcheck");
+    NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+    String collation = (String) suggestions.get("collation");
+    assertEquals("pixmaa", collation);
+  }
 
   @Test
   public void testCorrectSpelling() throws Exception {

Modified: lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema.xml?rev=987509&r1=987508&r2=987509&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema.xml Fri Aug 20 13:56:35 2010
@@ -197,6 +197,14 @@
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldtype>
+    <fieldtype name="lowerpunctfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
     <fieldtype name="patternreplacefilt" class="solr.TextField">
       <analyzer type="index">
         <tokenizer class="solr.KeywordTokenizerFactory"/>

Modified: lucene/dev/trunk/solr/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/test-files/solr/conf/solrconfig.xml?rev=987509&r1=987508&r2=987509&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/src/test/test-files/solr/conf/solrconfig.xml Fri Aug 20 13:56:35 2010
@@ -333,7 +333,8 @@
   </requestHandler>
 
   <searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
-    <str name="queryAnalyzerFieldType">lowerfilt</str>
+    <!-- This is slightly different from the field value so we can test dealing with token offset changes -->
+    <str name="queryAnalyzerFieldType">lowerpunctfilt</str>
 
     <lst name="spellchecker">
       <str name="name">default</str>