You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by Apache Wiki <wi...@apache.org> on 2011/09/27 16:53:55 UTC

[Solr Wiki] Update of "SpellCheckComponent" by DenizDogan

Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.

The "SpellCheckComponent" page has been changed by DenizDogan:
http://wiki.apache.org/solr/SpellCheckComponent?action=diff&rev1=53&rev2=54

Comment:
Fixing ugly indentation.

  
  {{{
  <lst name="spellcheck">
-  <lst name="suggestions">
+   <lst name="suggestions">
-   <lst name="delll">
+     <lst name="delll">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
-         <int name="startOffset">0</int>
+       <int name="startOffset">0</int>
-         <int name="endOffset">5</int>
+       <int name="endOffset">5</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <str>dell</str>
+         <str>dell</str>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <lst name="ultrashar">
+     <lst name="ultrashar">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
-         <int name="startOffset">6</int>
+       <int name="startOffset">6</int>
-         <int name="endOffset">15</int>
+       <int name="endOffset">15</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <str>ultrasharp</str>
+         <str>ultrasharp</str>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <str name="collation">dell ultrasharp</str>
+     <str name="collation">dell ultrasharp</str>
-  </lst>
+   </lst>
  </lst>
  }}}
  <<Anchor(configuration)>>
@@ -50, +50 @@

  {{{
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
  
-     <lst name="spellchecker">
+   <lst name="spellchecker">
-       <!--
+     <!--
-            Optional, it is required when more than one spellchecker is configured.
+         Optional, it is required when more than one spellchecker is configured.
-            Select non-default name with spellcheck.dictionary in request handler.
+         Select non-default name with spellcheck.dictionary in request handler.
-       -->
+     -->
-       <str name="name">default</str>
+     <str name="name">default</str>
-       <!-- The classname is optional, defaults to IndexBasedSpellChecker -->
+     <!-- The classname is optional, defaults to IndexBasedSpellChecker -->
-       <str name="classname">solr.IndexBasedSpellChecker</str>
+     <str name="classname">solr.IndexBasedSpellChecker</str>
-       <!--
+     <!--
-                Load tokens from the following field for spell checking,
+         Load tokens from the following field for spell checking,
-                analyzer for the field's type as defined in schema.xml are used
+         analyzer for the field's type as defined in schema.xml are used
-       -->
+     -->
-       <str name="field">spell</str>
+     <str name="field">spell</str>
-       <!-- Optional, by default use in-memory index (RAMDirectory) -->
+     <!-- Optional, by default use in-memory index (RAMDirectory) -->
-       <str name="spellcheckIndexDir">./spellchecker</str>
+     <str name="spellcheckIndexDir">./spellchecker</str>
-       <!-- Set the accuracy (float) to be used for the suggestions. Default is 0.5 -->
+     <!-- Set the accuracy (float) to be used for the suggestions. Default is 0.5 -->
-       <str name="accuracy">0.7</str>
+     <str name="accuracy">0.7</str>
-       <!-- Require terms to occur in 1/100th of 1% of documents in order to be included in the dictionary -->
+     <!-- Require terms to occur in 1/100th of 1% of documents in order to be included in the dictionary -->
-       <float name="thresholdTokenFrequency">.0001</float>
+     <float name="thresholdTokenFrequency">.0001</float>
-     </lst>
+   </lst>
-     <!-- Example of using different distance measure -->
+   <!-- Example of using different distance measure -->
-     <lst name="spellchecker">
+   <lst name="spellchecker">
-       <str name="name">jarowinkler</str>
+     <str name="name">jarowinkler</str>
-       <str name="field">lowerfilt</str>
+     <str name="field">lowerfilt</str>
-       <!-- Use a different Distance Measure -->
+     <!-- Use a different Distance Measure -->
-       <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
+     <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
-       <str name="spellcheckIndexDir">./spellchecker</str>
+     <str name="spellcheckIndexDir">./spellchecker</str>
  
-     </lst>
+   </lst>
  
-     <!-- This field type's analyzer is used by the QueryConverter to tokenize the value for "q" parameter -->
+   <!-- This field type's analyzer is used by the QueryConverter to tokenize the value for "q" parameter -->
-     <str name="queryAnalyzerFieldType">textSpell</str>
+   <str name="queryAnalyzerFieldType">textSpell</str>
  </searchComponent>
  <!--
-   The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens.  Uses a simple regular expression
+     The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens.  Uses a simple regular expression
-   to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
+     to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
  
-   Optional, defaults to solr.SpellingQueryConverter
+ Optional, defaults to solr.SpellingQueryConverter
  -->
  <queryConverter name="queryConverter" class="solr.SpellingQueryConverter"/>
  
  <!--  Add to a RequestHandler
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- NOTE:  YOU LIKELY DO NOT WANT A SEPARATE REQUEST HANDLER FOR THIS COMPONENT.  THIS IS DONE HERE SOLELY FOR
+      NOTE:  YOU LIKELY DO NOT WANT A SEPARATE REQUEST HANDLER FOR THIS COMPONENT.  THIS IS DONE HERE SOLELY FOR
- THE SIMPLICITY OF THE EXAMPLE.  YOU WILL LIKELY WANT TO BIND THE COMPONENT TO THE /select STANDARD REQUEST HANDLER.
+      THE SIMPLICITY OF THE EXAMPLE.  YOU WILL LIKELY WANT TO BIND THE COMPONENT TO THE /select STANDARD REQUEST HANDLER.
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  -->
  <requestHandler name="/spellCheckCompRH" class="solr.SearchHandler">
-     <lst name="defaults">
+   <lst name="defaults">
-       <!-- Optional, must match spell checker's name as defined above, defaults to "default" -->
+     <!-- Optional, must match spell checker's name as defined above, defaults to "default" -->
-       <str name="spellcheck.dictionary">default</str>
+     <str name="spellcheck.dictionary">default</str>
-       <!-- omp = Only More Popular -->
+     <!-- omp = Only More Popular -->
-       <str name="spellcheck.onlyMorePopular">false</str>
+     <str name="spellcheck.onlyMorePopular">false</str>
-       <!-- exr = Extended Results -->
+     <!-- exr = Extended Results -->
-       <str name="spellcheck.extendedResults">false</str>
+     <str name="spellcheck.extendedResults">false</str>
-       <!--  The number of suggestions to return -->
+     <!--  The number of suggestions to return -->
-       <str name="spellcheck.count">1</str>
+     <str name="spellcheck.count">1</str>
-     </lst>
+   </lst>
- <!--  Add to a RequestHandler
+   <!--  Add to a RequestHandler
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- REPEAT NOTE:  YOU LIKELY DO NOT WANT A SEPARATE REQUEST HANDLER FOR THIS COMPONENT.  THIS IS DONE HERE SOLELY FOR
+        REPEAT NOTE:  YOU LIKELY DO NOT WANT A SEPARATE REQUEST HANDLER FOR THIS COMPONENT.  THIS IS DONE HERE SOLELY FOR
- THE SIMPLICITY OF THE EXAMPLE.  YOU WILL LIKELY WANT TO BIND THE COMPONENT TO THE /select STANDARD REQUEST HANDLER.
+        THE SIMPLICITY OF THE EXAMPLE.  YOU WILL LIKELY WANT TO BIND THE COMPONENT TO THE /select STANDARD REQUEST HANDLER.
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- -->
+   -->
-     <arr name="last-components">
+   <arr name="last-components">
-       <str>spellcheck</str>
+     <str>spellcheck</str>
-     </arr>
+   </arr>
-   </requestHandler>
+ </requestHandler>
  }}}
  When adding {{{<str name="field">FieldName</str>}}} be aware all fieldType processing is done prior to the dictionary creation.  It is best to avoid a heavily processed field (ie synonyms and stemming) to get more accurate results.  If the field has many word variations from processing then the dictionary will be created with those in addition to more valid spell checking data.
  
@@ -142, +142 @@

  An example configuration might look like:
  
  {{{
-     <lst name="spellchecker">
+ <lst name="spellchecker">
-       <str name="name">freq</str>
+   <str name="name">freq</str>
-       <str name="field">lowerfilt</str>
+   <str name="field">lowerfilt</str>
-       <str name="spellcheckIndexDir">spellcheckerFreq</str>
+   <str name="spellcheckIndexDir">spellcheckerFreq</str>
-       <!-- comparatorClass be one of:
+   <!-- comparatorClass be one of:
-         1. score (default)
+        1. score (default)
-         2. freq (Frequency first, then score)
+        2. freq (Frequency first, then score)
-         3. A fully qualified class name
+        3. A fully qualified class name
-        -->
+   -->
-       <str name="comparatorClass">freq</str>
+   <str name="comparatorClass">freq</str>
-       <str name="buildOnCommit">true</str>
+   <str name="buildOnCommit">true</str>
-     </lst>
+ </lst>
  }}}
  <<Anchor(onCommit)>>
  
@@ -166, +166 @@

  For example:
  
  {{{
-     <lst name="spellchecker">
+ <lst name="spellchecker">
-       <str name="name">default</str>
+   <str name="name">default</str>
-       <str name="field">spell</str>
+   <str name="field">spell</str>
-       <str name="spellcheckIndexDir">./spellchecker1</str>
+   <str name="spellcheckIndexDir">./spellchecker1</str>
-       <str name="buildOnCommit">true</str>
+   <str name="buildOnCommit">true</str>
-     </lst>
+ </lst>
  }}}
  <<Anchor(onOptimize)>>
  
@@ -320, +320 @@

  }}}
  {{{
  <lst name="spellcheck">
-  <lst name="suggestions">
+   <lst name="suggestions">
-   <lst name="delll">
+     <lst name="delll">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
-         <int name="startOffset">0</int>
+       <int name="startOffset">0</int>
-         <int name="endOffset">5</int>
+       <int name="endOffset">5</int>
-         <int name="origFreq">0</int>
+       <int name="origFreq">0</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <lst>
+         <lst>
  
            <str name="word">dell</str>
            <int name="freq">2</int>
-          </lst>
+         </lst>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <lst name="ultrashar">
+     <lst name="ultrashar">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
  
-         <int name="startOffset">6</int>
+       <int name="startOffset">6</int>
-         <int name="endOffset">15</int>
+       <int name="endOffset">15</int>
-         <int name="origFreq">0</int>
+       <int name="origFreq">0</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <lst>
+         <lst>
            <str name="word">ultrasharp</str>
            <int name="freq">2</int>
  
-          </lst>
+         </lst>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <bool name="correctlySpelled">false</bool>
+     <bool name="correctlySpelled">false</bool>
-  </lst>
+   </lst>
  </lst>
  }}}
  == Collate Results ==
@@ -360, +360 @@

  }}}
  {{{
  <lst name="spellcheck">
-  <lst name="suggestions">
+   <lst name="suggestions">
-   <lst name="delll">
+     <lst name="delll">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
-         <int name="startOffset">18</int>
+       <int name="startOffset">18</int>
-         <int name="endOffset">23</int>
+       <int name="endOffset">23</int>
-         <int name="origFreq">0</int>
+       <int name="origFreq">0</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <lst>
+         <lst>
            <str name="word">dell</str>
            <int name="freq">2</int>
-          </lst>
+         </lst>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <lst name="ultrashar">
+     <lst name="ultrashar">
-         <int name="numFound">1</int>
+       <int name="numFound">1</int>
-         <int name="startOffset">24</int>
+       <int name="startOffset">24</int>
-         <int name="endOffset">33</int>
+       <int name="endOffset">33</int>
-         <int name="origFreq">0</int>
+       <int name="origFreq">0</int>
-         <arr name="suggestion">
+       <arr name="suggestion">
-          <lst>
+         <lst>
            <str name="word">ultrasharp</str>
            <int name="freq">2</int>
-          </lst>
+         </lst>
-         </arr>
+       </arr>
-   </lst>
+     </lst>
-   <bool name="correctlySpelled">false</bool>
+     <bool name="correctlySpelled">false</bool>
-   <str name="collation">price:[80 TO 100] dell ultrasharp</str>
+     <str name="collation">price:[80 TO 100] dell ultrasharp</str>
-  </lst>
+   </lst>
  </lst>
  }}}
  = Implementing a new java SolrSpellChecker =