You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/08 13:55:03 UTC

svn commit: r1154935 - in /lucene/dev/trunk/solr: core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java example/solr/conf/solrconfig.xml

Author: rmuir
Date: Mon Aug  8 11:55:03 2011
New Revision: 1154935

URL: http://svn.apache.org/viewvc?rev=1154935&view=rev
Log:
SOLR-2688: switch solr 4.0 example to DirectSpellChecker

Modified:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
    lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Mon Aug  8 11:55:03 2011
@@ -473,6 +473,7 @@ public class SpellCheckComponent extends
 
   private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
     Collection<Token> result = new ArrayList<Token>();
+    assert analyzer != null;
     TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
     ts.reset();
     // TODO: support custom attributes
@@ -589,6 +590,8 @@ public class SpellCheckComponent extends
         if (initParams.getName(i).equals("spellchecker")) {
           NamedList spellchecker = (NamedList) initParams.getVal(i);
           String className = (String) spellchecker.get("classname");
+          // TODO: this is a little bit sneaky: warn if class isnt supplied
+          // so that its mandatory in a future release?
           if (className == null)
             className = IndexBasedSpellChecker.class.getName();
           SolrResourceLoader loader = core.getResourceLoader();

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java Mon Aug  8 11:55:03 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Comparator;
 
 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.spell.DirectSpellChecker;
 import org.apache.lucene.search.spell.StringDistance;
@@ -30,6 +31,7 @@ import org.apache.lucene.search.spell.Su
 import org.apache.solr.common.params.SpellingParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -65,6 +67,7 @@ public class DirectSolrSpellChecker exte
   public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
   public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
   public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
+  public static final String FIELD_TYPE = AbstractLuceneSpellChecker.FIELD_TYPE;
   public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
   public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
   public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
@@ -91,6 +94,7 @@ public class DirectSolrSpellChecker exte
   
   private DirectSpellChecker checker = new DirectSpellChecker();
   private String field;
+  private String fieldTypeName;
   
   @Override
   public String init(NamedList config, SolrCore core) {
@@ -114,6 +118,19 @@ public class DirectSolrSpellChecker exte
       sd = (StringDistance) core.getResourceLoader().newInstance(distClass);
 
     field = (String) config.get(FIELD);
+    // setup analyzer for field
+    if (field != null && core.getSchema().getFieldTypeNoEx(field) != null)  {
+      analyzer = core.getSchema().getFieldType(field).getQueryAnalyzer();
+    }
+    fieldTypeName = (String) config.get(FIELD_TYPE);
+    if (core.getSchema().getFieldTypes().containsKey(fieldTypeName))  {
+      FieldType fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
+      analyzer = fieldType.getQueryAnalyzer();
+    }
+    if (analyzer == null)   {
+      LOG.info("Using WhitespaceAnalyzer for dictionary: " + name);
+      analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion);
+    }
     
     float minAccuracy = DEFAULT_ACCURACY;
     Float accuracy = (Float) config.get(ACCURACY);

Modified: lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml Mon Aug  8 11:55:03 2011
@@ -1111,37 +1111,39 @@
          component
       -->
 
-    <!-- a spellchecker built from a field of the main index, and
-         written to disk
-      -->
+    <!-- a spellchecker built from a field of the main index -->
     <lst name="spellchecker">
       <str name="name">default</str>
       <str name="field">name</str>
-      <str name="spellcheckIndexDir">spellchecker</str>
-      <!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
       	<float name="thresholdTokenFrequency">.01</float>
       -->
     </lst>
 
-    <!-- a spellchecker that uses no auxiliary on disk index -->
-    <!--
-       <lst name="spellchecker">
-         <str name="name">direct</str>
-         <str name="field">name</str>
-         <str name="classname">solr.DirectSolrSpellChecker</str>
-         <int name="minPrefix">1</int>
-       </lst>
-      -->
-
     <!-- a spellchecker that uses a different distance measure -->
     <!--
        <lst name="spellchecker">
          <str name="name">jarowinkler</str>
          <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
          <str name="distanceMeasure">
            org.apache.lucene.search.spell.JaroWinklerDistance
          </str>
-         <str name="spellcheckIndexDir">spellcheckerJaro</str>
        </lst>
      -->
 
@@ -1156,9 +1158,8 @@
        <lst name="spellchecker">
          <str name="name">freq</str>
          <str name="field">lowerfilt</str>
-         <str name="spellcheckIndexDir">spellcheckerFreq</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
          <str name="comparatorClass">freq</str>
-         <str name="buildOnCommit">true</str>
       -->
 
     <!-- A spellchecker that reads the list of words from a file -->