You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/08 13:55:03 UTC
svn commit: r1154935 - in /lucene/dev/trunk/solr:
core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
example/solr/conf/solrconfig.xml
Author: rmuir
Date: Mon Aug 8 11:55:03 2011
New Revision: 1154935
URL: http://svn.apache.org/viewvc?rev=1154935&view=rev
Log:
SOLR-2688: switch solr 4.0 example to DirectSpellChecker
Modified:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Mon Aug 8 11:55:03 2011
@@ -473,6 +473,7 @@ public class SpellCheckComponent extends
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
Collection<Token> result = new ArrayList<Token>();
+ assert analyzer != null;
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
ts.reset();
// TODO: support custom attributes
@@ -589,6 +590,8 @@ public class SpellCheckComponent extends
if (initParams.getName(i).equals("spellchecker")) {
NamedList spellchecker = (NamedList) initParams.getVal(i);
String className = (String) spellchecker.get("classname");
+ // TODO: this is a little bit sneaky: warn if class isnt supplied
+ // so that its mandatory in a future release?
if (className == null)
className = IndexBasedSpellChecker.class.getName();
SolrResourceLoader loader = core.getResourceLoader();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java Mon Aug 8 11:55:03 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.StringDistance;
@@ -30,6 +31,7 @@ import org.apache.lucene.search.spell.Su
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -65,6 +67,7 @@ public class DirectSolrSpellChecker exte
public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
+ public static final String FIELD_TYPE = AbstractLuceneSpellChecker.FIELD_TYPE;
public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
@@ -91,6 +94,7 @@ public class DirectSolrSpellChecker exte
private DirectSpellChecker checker = new DirectSpellChecker();
private String field;
+ private String fieldTypeName;
@Override
public String init(NamedList config, SolrCore core) {
@@ -114,6 +118,19 @@ public class DirectSolrSpellChecker exte
sd = (StringDistance) core.getResourceLoader().newInstance(distClass);
field = (String) config.get(FIELD);
+ // setup analyzer for field
+ if (field != null && core.getSchema().getFieldTypeNoEx(field) != null) {
+ analyzer = core.getSchema().getFieldType(field).getQueryAnalyzer();
+ }
+ fieldTypeName = (String) config.get(FIELD_TYPE);
+ if (core.getSchema().getFieldTypes().containsKey(fieldTypeName)) {
+ FieldType fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
+ analyzer = fieldType.getQueryAnalyzer();
+ }
+ if (analyzer == null) {
+ LOG.info("Using WhitespaceAnalyzer for dictionary: " + name);
+ analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion);
+ }
float minAccuracy = DEFAULT_ACCURACY;
Float accuracy = (Float) config.get(ACCURACY);
Modified: lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml?rev=1154935&r1=1154934&r2=1154935&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml Mon Aug 8 11:55:03 2011
@@ -1111,37 +1111,39 @@
component
-->
- <!-- a spellchecker built from a field of the main index, and
- written to disk
- -->
+ <!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
- <str name="spellcheckIndexDir">spellchecker</str>
- <!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
+ <str name="classname">solr.DirectSolrSpellChecker</str>
+ <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+ <str name="distanceMeasure">internal</str>
+ <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+ <float name="accuracy">0.5</float>
+ <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+ <int name="maxEdits">2</int>
+ <!-- the minimum shared prefix when enumerating terms -->
+ <int name="minPrefix">1</int>
+ <!-- maximum number of inspections per result. -->
+ <int name="maxInspections">5</int>
+ <!-- minimum length of a query term to be considered for correction -->
+ <int name="minQueryLength">4</int>
+ <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+ <float name="maxQueryFrequency">0.01</float>
+ <!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
- <!-- a spellchecker that uses no auxiliary on disk index -->
- <!--
- <lst name="spellchecker">
- <str name="name">direct</str>
- <str name="field">name</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <int name="minPrefix">1</int>
- </lst>
- -->
-
<!-- a spellchecker that uses a different distance measure -->
<!--
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">spell</str>
+ <str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">
org.apache.lucene.search.spell.JaroWinklerDistance
</str>
- <str name="spellcheckIndexDir">spellcheckerJaro</str>
</lst>
-->
@@ -1156,9 +1158,8 @@
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
- <str name="spellcheckIndexDir">spellcheckerFreq</str>
+ <str name="classname">solr.DirectSolrSpellChecker</str>
<str name="comparatorClass">freq</str>
- <str name="buildOnCommit">true</str>
-->
<!-- A spellchecker that reads the list of words from a file -->