You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/18 16:52:02 UTC

svn commit: r986713 - in /lucene/dev/branches/branch_3x: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/ solr/ solr/example/solr/conf/ solr/src/java/org/apache/solr/spelling/ solr/src/test/org/apache/solr/spelling/ solr/src/test/te...

Author: gsingers
Date: Wed Aug 18 14:52:01 2010
New Revision: 986713

URL: http://svn.apache.org/viewvc?rev=986713&view=rev
Log:
SOLR-2053: hook in support for specifying comparators

Added:
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java   (with props)
Modified:
    lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
    lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml

Modified: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Wed Aug 18 14:52:01 2010
@@ -176,6 +176,10 @@ public class SpellChecker implements jav
     this.comparator = comparator;
   }
 
+  public Comparator<SuggestWord> getComparator() {
+    return comparator;
+  }
+
   /**
    * Sets the {@link StringDistance} implementation for this
    * {@link SpellChecker} instance.

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Wed Aug 18 14:52:01 2010
@@ -193,6 +193,8 @@ New Features
 
 * SOLR-2030: Make FastVectorHighlighter use of SolrEncoder. (koji)
 
+* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml Wed Aug 18 14:52:01 2010
@@ -704,6 +704,19 @@
       <str name="spellcheckIndexDir">./spellchecker2</str>
     </lst>
      -->
+    <!-- Use an alternate comparator -->
+    <!--<lst name="spellchecker">
+      <str name="name">freq</str>
+      <str name="field">lowerfilt</str>
+      <str name="spellcheckIndexDir">spellcheckerFreq</str>
+      &lt;!&ndash; comparatorClass be one of:
+        1. score (default)
+        2. freq (Frequency first, then score)
+        3. A fully qualified class name
+       &ndash;&gt;
+      <str name="comparatorClass">freq</str>
+      <str name="buildOnCommit">true</str>
+    -->
 
     <!-- a file based spell checker
     <lst name="spellchecker">

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java Wed Aug 18 14:52:01 2010
@@ -22,7 +22,12 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.List;
+
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
+import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -60,6 +65,11 @@ public abstract class AbstractLuceneSpel
   public static final String ACCURACY = "accuracy";
   public static final String STRING_DISTANCE = "distanceMeasure";
   public static final String FIELD_TYPE = "fieldType";
+  public static final String COMPARATOR_CLASS = "comparatorClass";
+
+  public static final String SCORE_COMP = "score";
+  public static final String FREQ_COMP = "freq";
+
   protected String field;
   protected String fieldTypeName;
   protected org.apache.lucene.search.spell.SpellChecker spellChecker;
@@ -89,6 +99,19 @@ public abstract class AbstractLuceneSpel
       }
     }
     sourceLocation = (String) config.get(LOCATION);
+    String compClass = (String) config.get(COMPARATOR_CLASS);
+    Comparator<SuggestWord> comp = null;
+    if (compClass != null){
+      if (compClass.equalsIgnoreCase(SCORE_COMP)){
+        comp = SuggestWordQueue.DEFAULT_COMPARATOR;
+      } else if (compClass.equalsIgnoreCase(FREQ_COMP)){
+        comp = new SuggestWordFrequencyComparator();
+      } else{//must be a FQCN
+        comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass);
+      }
+    } else {
+      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
+    }
     field = (String) config.get(FIELD);
     String strDistanceName = (String)config.get(STRING_DISTANCE);
     if (strDistanceName != null) {
@@ -99,7 +122,7 @@ public abstract class AbstractLuceneSpel
     }
     try {
       initIndex();
-      spellChecker = new SpellChecker(index, sd);
+      spellChecker = new SpellChecker(index, sd, comp);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
@@ -230,4 +253,8 @@ public abstract class AbstractLuceneSpel
   public StringDistance getStringDistance() {
     return sd;
   }
+
+  public SpellChecker getSpellChecker() {
+    return spellChecker;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java Wed Aug 18 14:52:01 2010
@@ -27,10 +27,14 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.search.spell.JaroWinklerDistance;
 import org.apache.lucene.search.spell.SpellChecker;
 import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.SearchComponent;
+import org.apache.solr.handler.component.SpellCheckComponent;
 import org.apache.solr.util.RefCounted;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.junit.AfterClass;
@@ -39,6 +43,7 @@ import org.junit.Test;
 
 import java.io.File;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.Map;
 
@@ -76,6 +81,27 @@ public class IndexBasedSpellCheckerTest 
   }
 
   @Test
+  public void testComparator() throws Exception {
+    SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
+    assertNotNull(component);
+    AbstractLuceneSpellChecker spellChecker;
+    Comparator<SuggestWord> comp;
+    spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
+    assertNotNull(spellChecker);
+    comp = spellChecker.getSpellChecker().getComparator();
+    assertNotNull(comp);
+    assertTrue(comp instanceof SuggestWordFrequencyComparator);
+
+    spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
+    assertNotNull(spellChecker);
+    comp = spellChecker.getSpellChecker().getComparator();
+    assertNotNull(comp);
+    assertTrue(comp instanceof SampleComparator);
+
+
+  }
+
+  @Test
   public void testSpelling() throws Exception {
     IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
 

Added: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java?rev=986713&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java Wed Aug 18 14:52:01 2010
@@ -0,0 +1,36 @@
+package org.apache.solr.spelling;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.spell.SuggestWord;
+
+import java.util.Comparator;
+
+
+/**
+ * Comparator for testing purposes
+ *
+ **/
+public class SampleComparator implements Comparator<SuggestWord> {
+
+
+  @Override
+  public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
+    return suggestWord.string.compareTo(suggestWord1.string);
+  }
+
+}

Propchange: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml Wed Aug 18 14:52:01 2010
@@ -357,6 +357,27 @@
       <str name="characterEncoding">UTF-8</str>
       <str name="spellcheckIndexDir">spellchecker3</str>
     </lst>
+    <!-- Comparator -->
+    <lst name="spellchecker">
+      <str name="name">freq</str>
+      <str name="field">lowerfilt</str>
+      <str name="spellcheckIndexDir">spellcheckerFreq</str>
+      <!-- comparatorClass be one of:
+        1. score (default)
+        2. freq (Frequency first, then score)
+        3. A fully qualified class name
+       -->
+      <str name="comparatorClass">freq</str>
+      <str name="buildOnCommit">true</str>
+    </lst>
+    <lst name="spellchecker">
+      <str name="name">fqcn</str>
+      <str name="field">lowerfilt</str>
+      <str name="spellcheckIndexDir">spellcheckerFQCN</str>
+      <str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
+      <str name="buildOnCommit">true</str>
+    </lst>
+
   </searchComponent>
 
   <searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>