You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ep...@apache.org on 2024/03/22 12:07:23 UTC

(solr) branch main updated: SOLR-12089: FileBasedSpellChecker docs have some missing params (#2356)

This is an automated email from the ASF dual-hosted git repository.

epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new c2c02594323 SOLR-12089: FileBasedSpellChecker docs have some missing params (#2356)
c2c02594323 is described below

commit c2c025943238d9ad3aed7274cc18bc9b23942e29
Author: Andrey Bozhko <an...@gmail.com>
AuthorDate: Fri Mar 22 07:07:17 2024 -0500

    SOLR-12089: FileBasedSpellChecker docs have some missing params (#2356)
    
    * Now handles a accept accuracy as float.
    * deprecate misspelled `breakSugestionTieBreaker` parameter in favor of `breakSuggestionTieBreaker`
      in WordBreakSolrSpellChecker.
    * Audit and update the Ref Guide for missing parameters.
    
    ---------
    
    Co-authored-by: Andrey Bozhko <ab...@apple.com>
    Co-authored-by: Eric Pugh <ep...@opensourceconnections.com>
    Co-authored-by: Christine Poerschke <cp...@apache.org>
---
 solr/CHANGES.txt                                   |   4 +
 .../handler/component/SpellCheckComponent.java     |   2 +-
 .../solr/spelling/AbstractLuceneSpellChecker.java  |   4 +-
 .../solr/spelling/WordBreakSolrSpellChecker.java   |  29 +++-
 .../solr/collection1/conf/solrconfig-minhash.xml   |   2 +-
 .../collection1/conf/solrconfig-plugcollector.xml  |   2 +-
 .../conf/solrconfig-spellcheckcomponent.xml        |   5 +-
 .../solr/collection1/conf/solrconfig.xml           |   2 +-
 .../conf/solrconfig.xml                            |   2 +-
 .../query-guide/pages/function-queries.adoc        |   2 +-
 .../modules/query-guide/pages/spell-checking.adoc  | 168 +++++++++++++++------
 .../solr/collection1/conf/solrconfig.xml           |   2 +-
 12 files changed, 161 insertions(+), 63 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0cd8f12ae70..44f58030013 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -162,6 +162,10 @@ Other Changes
 
 * SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (Michael Gibney)
 
+* SOLR-12089: Update FileBasedSpellChecker and IndexBasedSpellChecker to accept accuracy parameter
+  as float; deprecate `breakSugestionTieBreaker` parameter in favor of `breakSuggestionTieBreaker`
+  in WordBreakSolrSpellChecker (Andrey Bozhko via Eric Pugh)
+
 * SOLR-17201: Http2SolrClient and friends no longer marked as @lucene.experimental.
   Krb5HttpClientBuilder and PreemptiveBasicAuthClientBuilderFactory no longer deprecated (janhoy)
 
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
index 9a45aefea02..a9603de49b8 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@@ -760,7 +760,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
   private boolean addSpellChecker(SolrCore core, boolean hasDefault, NamedList<?> spellchecker) {
     String className = (String) spellchecker.get("classname");
     if (className == null) className = (String) spellchecker.get("class");
-    // TODO: this is a little bit sneaky: warn if class isnt supplied
+    // TODO: this is a little bit sneaky: warn if class isn't supplied
     // so that it's mandatory in a future release?
     if (className == null) className = IndexBasedSpellChecker.class.getName();
     SolrResourceLoader loader = core.getResourceLoader();
diff --git a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
index df413b01e06..a54ea8ab9e1 100644
--- a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
@@ -81,7 +81,6 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
   public String init(NamedList<?> config, SolrCore core) {
     super.init(config, core);
     indexDir = (String) config.get(INDEX_DIR);
-    String accuracy = (String) config.get(ACCURACY);
     // If indexDir is relative then create index inside core.getDataDir()
     if (indexDir != null) {
       if (!new File(indexDir).isAbsolute()) {
@@ -120,9 +119,10 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
+    Object accuracy = config.get(ACCURACY);
     if (accuracy != null) {
       try {
-        this.accuracy = Float.parseFloat(accuracy);
+        this.accuracy = Float.parseFloat(accuracy.toString());
         spellChecker.setAccuracy(this.accuracy);
       } catch (NumberFormatException e) {
         throw new RuntimeException("Unparseable accuracy given for dictionary: " + name, e);
diff --git a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
index 90029320a37..db7bd7f5665 100644
--- a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
@@ -17,6 +17,7 @@
 package org.apache.solr.spelling;
 
 import java.io.IOException;
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
@@ -32,6 +33,8 @@ import org.apache.lucene.search.spell.WordBreakSpellChecker.BreakSuggestionSortM
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A spellchecker that breaks and combines words.
@@ -46,6 +49,9 @@ import org.apache.solr.search.SolrIndexSearcher;
  * properly sets these flags.
  */
 public class WordBreakSolrSpellChecker extends SolrSpellChecker {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
   /** Try to combine multiple words into one? [true|false] */
   public static final String PARAM_COMBINE_WORDS = "combineWords";
 
@@ -61,8 +67,16 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
   /** See {@link WordBreakSpellChecker#setMinBreakWordLength} */
   public static final String PARAM_MIN_BREAK_WORD_LENGTH = "minBreakLength";
 
+  /**
+   * See {@link BreakSuggestionTieBreaker} for options.
+   *
+   * @deprecated Only used for backwards compatibility. It will be removed in 10.x.
+   */
+  @Deprecated(since = "9.6")
+  private static final String PARAM_BREAK_SUGESTION_TIE_BREAKER = "breakSugestionTieBreaker";
+
   /** See {@link BreakSuggestionTieBreaker} for options. */
-  public static final String PARAM_BREAK_SUGGESTION_TIE_BREAKER = "breakSugestionTieBreaker";
+  public static final String PARAM_BREAK_SUGGESTION_TIE_BREAKER = "breakSuggestionTieBreaker";
 
   /** See {@link WordBreakSpellChecker#setMaxEvaluations} */
   public static final String PARAM_MAX_EVALUATIONS = "maxEvaluations";
@@ -70,7 +84,7 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
   /** See {@link WordBreakSpellChecker#setMinSuggestionFrequency} */
   public static final String PARAM_MIN_SUGGESTION_FREQUENCY = "minSuggestionFreq";
 
-  /** Specify a value on the "breakSugestionTieBreaker" parameter. The default is MAX_FREQ. */
+  /** Specify a value on the "breakSuggestionTieBreaker" parameter. The default is MAX_FREQ. */
   public enum BreakSuggestionTieBreaker {
     /** See {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY} # */
     MAX_FREQ,
@@ -92,6 +106,17 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
     breakWords = boolParam(config, PARAM_BREAK_WORDS);
     wbsp = new WordBreakSpellChecker();
     String bstb = strParam(config, PARAM_BREAK_SUGGESTION_TIE_BREAKER);
+    if (bstb == null) {
+      bstb = strParam(config, PARAM_BREAK_SUGESTION_TIE_BREAKER);
+      if (bstb != null && log.isWarnEnabled()) {
+        log.warn(
+            "Parameter '"
+                + PARAM_BREAK_SUGESTION_TIE_BREAKER
+                + "' is deprecated and will be removed in Solr 10.x. Please use '"
+                + PARAM_BREAK_SUGGESTION_TIE_BREAKER
+                + "' instead."); // nowarn
+      }
+    }
     if (bstb != null) {
       bstb = bstb.toUpperCase(Locale.ROOT);
       if (bstb.equals(BreakSuggestionTieBreaker.SUM_FREQ.name())) {
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-minhash.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-minhash.xml
index e23b30b62c1..9fa236dda0b 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-minhash.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-minhash.xml
@@ -271,7 +271,7 @@
       <str name="name">freq</str>
       <str name="field">lowerfilt</str>
       <str name="spellcheckIndexDir">spellcheckerFreq</str>
-      <!-- comparatorClass be one of:
+      <!-- comparatorClass can be one of:
         1. score (default)
         2. freq (Frequency first, then score)
         3. A fully qualified class name
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-plugcollector.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-plugcollector.xml
index 34636cd6cfd..845998ec2f4 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-plugcollector.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-plugcollector.xml
@@ -259,7 +259,7 @@
       <str name="name">freq</str>
       <str name="field">lowerfilt</str>
       <str name="spellcheckIndexDir">spellcheckerFreq</str>
-      <!-- comparatorClass be one of:
+      <!-- comparatorClass can be one of:
         1. score (default)
         2. freq (Frequency first, then score)
         3. A fully qualified class name
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml
index 7760eb261e3..0253d91b804 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml
@@ -83,7 +83,7 @@
       <str name="field">lowerfilt</str>
       <str name="combineWords">true</str>
       <str name="breakWords">true</str>
-      <str name="breakSugestionTieBreaker">MAX_FREQ</str>
+      <str name="breakSuggestionTieBreaker">MAX_FREQ</str>
       <int name="maxChanges">10</int>
     </lst>
     <lst name="spellchecker">
@@ -122,13 +122,14 @@
       <str name="sourceLocation">spellings.txt</str>
       <str name="characterEncoding">UTF-8</str>
       <str name="spellcheckIndexDir">spellchecker3</str>
+      <float name="accuracy">0.5</float>
     </lst>
     <!-- Comparator -->
     <lst name="spellchecker">
       <str name="name">freq</str>
       <str name="field">lowerfilt</str>
       <str name="spellcheckIndexDir">spellcheckerFreq</str>
-      <!-- comparatorClass be one of:
+      <!-- comparatorClass can be one of:
         1. score (default)
         2. freq (Frequency first, then score)
         3. A fully qualified class name
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
index 9b66df4cc0e..82dca6384d8 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -278,7 +278,7 @@
       <str name="name">freq</str>
       <str name="field">lowerfilt</str>
       <str name="spellcheckIndexDir">spellcheckerFreq</str>
-      <!-- comparatorClass be one of:
+      <!-- comparatorClass can be one of:
         1. score (default)
         2. freq (Frequency first, then score)
         3. A fully qualified class name
diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml b/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml
index 9b83b3e6db5..6b019488435 100644
--- a/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml
+++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml
@@ -828,7 +828,7 @@
 
     <!-- a spellchecker that use an alternate comparator
 
-         comparatorClass be one of:
+         comparatorClass can be one of:
           1. score (default)
           2. freq (Frequency first, then score)
           3. A fully qualified class name
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/function-queries.adoc b/solr/solr-ref-guide/modules/query-guide/pages/function-queries.adoc
index 48f9345f1cd..7c6f1a9d0ea 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/function-queries.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/function-queries.adoc
@@ -473,7 +473,7 @@ Uses the Lucene spell checker `StringDistance` interface and supports all of the
 Possible values for distance measure are:
 
 * jw: Jaro-Winkler
-* edit: Levenstein or Edit distance
+* edit: Levenshtein or Edit distance
 * ngram: The NGramDistance, if specified, can optionally pass in the ngram size too.
 Default is 2.
 * FQN: Fully Qualified class Name for an implementation of the StringDistance interface.
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/spell-checking.adoc b/solr/solr-ref-guide/modules/query-guide/pages/spell-checking.adoc
index f6962ce8d4d..fea8e94c5bf 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/spell-checking.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/spell-checking.adoc
@@ -25,26 +25,34 @@ The basis for these suggestions can be terms in a field in Solr, externally crea
 === Define Spell Check in solrconfig.xml
 
 The first step is to specify the source of terms in `solrconfig.xml`.
-There are three approaches to spell checking in Solr, discussed below.
+There are a number of approaches to spell checking in Solr, discussed below.
 
 ==== IndexBasedSpellChecker
 
 The `IndexBasedSpellChecker` uses a Solr index as the basis for a parallel index used for spell checking.
 It requires defining a field as the basis for the index terms; a common practice is to copy terms from some fields (such as `title`, `body`, etc.) to another field created for spell checking.
-Here is a simple example of configuring `solrconfig.xml` with the `IndexBasedSpellChecker`:
+Here is an example of configuring `IndexBasedSpellChecker` in `solrconfig.xml`:
 
 [source,xml]
 ----
 <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
   <lst name="spellchecker">
     <str name="classname">solr.IndexBasedSpellChecker</str>
-    <str name="spellcheckIndexDir">./spellchecker</str>
+    <!-- required parameters -->
     <str name="field">content</str>
-    <str name="buildOnCommit">true</str>
-    <!-- optional elements with defaults
+    <!-- optional parameters for IndexBasedSpellChecker -->
+    <str name="sourceLocation">./folder/with/index/files</str>
+    <!-- optional parameters for all spellcheckers -->
+    <str name="spellcheckIndexDir">./spellcheckerDir</str>
+    <str name="name">default</str>
+    <str name="fieldType">content_ft</str>
+    <str name="queryAnalyzerFieldType">text_general</str>
     <str name="distanceMeasure">org.apache.lucene.search.spell.LevenshteinDistance</str>
-    <str name="accuracy">0.5</str>
-    -->
+    <str name="comparatorClass">score</str>
+    <float name="accuracy">0.5</float>
+    <float name="thresholdTokenFrequency">0.0</float>
+    <str name="buildOnCommit">true</str>
+    <str name="buildOnOptimize">false</str>
  </lst>
 </searchComponent>
 ----
@@ -53,47 +61,83 @@ The first element defines the `searchComponent` to use the `solr.SpellCheckCompo
 The `classname` is the specific implementation of the SpellCheckComponent, in this case `solr.IndexBasedSpellChecker`.
 Defining the `classname` is optional; if not defined, it will default to `IndexBasedSpellChecker`.
 
-The `spellcheckIndexDir` defines the location of the directory that holds the spellcheck index, while the `field` defines the source field (defined in the Schema) for spell check terms.
+The `spellcheckIndexDir` defines the location of the directory that holds the spellcheck index, while the `field` defines the source field (defined in the Schema) for spellcheck terms.
 When choosing a field for the spellcheck index, it's best to avoid a heavily processed field to get more accurate results.
 If the field has many word variations from processing synonyms and/or stemming, the dictionary will be created with those variations in addition to more valid spelling data.
 
-Finally, _buildOnCommit_ defines whether to build the spell check index at every commit (that is, every time new documents are added to the index).
-It is optional, and can be omitted if you would rather set it to `false`.
+By default, this spellchecker builds its dictionary from the Solr index.
+This can be changed by specifying `sourceLocation` - a folder with static Lucene index files to use instead of the Solr index.
+
+The spellchecker can be assigned a descriptive label, `name`, - which can be helpful if the search component defines
+multiple spellcheckers. With that, a spellcheck query can identify a subset of spellcheckers that should be consulted
+(see xref:spell-check-parameters[] for more details).
+
+The query analyzer for the `field` is used to tokenize the spellcheck query.
+If there's a need to override that behavior, configure a `fieldType` and the spellchecker
+will use the query analyzer for that field type instead.
+
+`queryAnalyzerFieldType` is a field type from Solr's schema, and works similarly to the `fieldType` parameter.
+The key difference is that Solr uses `field` or `fieldType` when it tokenizes the spellcheck query
+supplied via `spellcheck.q`, and uses `queryAnalyzerFieldType` when the query is instead provided via the `q` parameter.
+
+The field type specified by this parameter should do minimal transformations.
+It's usually a best practice to avoid types that aggressively stem or NGram, for instance, since those types of analysis can throw off spell checking.
+
+Common configuration parameters like `distanceMeasure`, `comparatorClass`, `accuracy`, and `thresholdTokenFrequency`
+provide control over the returned spellcheck suggestions.
+
+If the `distanceMeasure` is not specified, Solr will use the Levenshtein metric which is the default metric for other spellchecker implementations as well (except for `DirectSolrSpellChecker`).
+
+When `comparatorClass` is configured as "score", the suggestions with lower distance (i.e., higher similarity) scores are considered more relevant.
+The alternative value is "freq" - this prioritizes suggestions with higher document frequency.
+
+The `accuracy` setting defines the threshold for a valid suggestion, and the `thresholdTokenFrequency` setting allows
+skipping suggestions which have low document frequency in the index.
+
+Finally, `buildOnCommit` and `buildOnOptimize` define whether to build the spellcheck index at every commit (that is, every time new documents are added to the index)
+or at every optimize request.
+Both are optional, and can be omitted if you would rather set their values to `false`.
 
 ==== DirectSolrSpellChecker
 
 The `DirectSolrSpellChecker` uses terms from the Solr index without building a parallel index like the `IndexBasedSpellChecker`.
-This spell checker has the benefit of not having to be built regularly, meaning that the terms are always up-to-date with terms in the index.
-Here is how this might be configured in `solrconfig.xml`
+This spellchecker has the benefit of not having to be built regularly, meaning that the terms are always up-to-date with terms in the index.
+Here is how this might be configured in `solrconfig.xml`:
 
 [source,xml]
 ----
 <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
   <lst name="spellchecker">
-    <str name="name">default</str>
-    <str name="field">name</str>
     <str name="classname">solr.DirectSolrSpellChecker</str>
-    <str name="distanceMeasure">internal</str>
-    <float name="accuracy">0.5</float>
+    <!-- required parameters -->
+    <str name="field">name</str>
+    <!-- optional parameters for DirectSolrSpellChecker -->
     <int name="maxEdits">2</int>
     <int name="minPrefix">1</int>
     <int name="maxInspections">5</int>
     <int name="minQueryLength">4</int>
     <int name="maxQueryLength">40</int>
     <float name="maxQueryFrequency">0.01</float>
-    <float name="thresholdTokenFrequency">.01</float>
+    <!-- optional parameters for all spellcheckers -->
+    <str name="name">default</str>
+    <str name="fieldType">name</str>
+    <str name="queryAnalyzerFieldType">text_general</str>
+    <str name="distanceMeasure">internal</str>
+    <str name="comparatorClass">score</str>
+    <float name="accuracy">0.5</float>
+    <float name="thresholdTokenFrequency">0.0</float>
   </lst>
 </searchComponent>
 ----
 
-When choosing a `field` to query for this spell checker, you want one which has relatively little analysis performed on it (particularly analysis such as stemming).
+When choosing a `field` to query for this spellchecker, you want one which has relatively little analysis performed on it (particularly analysis such as stemming).
 Note that you need to specify a field to use for the suggestions, so like the `IndexBasedSpellChecker`, you may want to copy data from fields like `title`, `body`, etc., to a field dedicated to providing spelling suggestions.
 
-Many of the parameters relate to how this spell checker should query the index for term suggestions.
-The `distanceMeasure` defines the metric to use during the spell check query.
-The value "internal" uses the default Levenshtein metric, which is the same metric used with the other spell checker implementations.
+Many of the parameters relate to how this spellchecker should query the index for term suggestions.
+The `distanceMeasure` defines the metric to use during the spellcheck query - the default value for this spellchecker is "internal",
+which corresponds to the Damerau-Levenshtein metric.
 
-Because this spell checker is querying the main index, you may want to limit how often it queries the index to be sure to avoid any performance conflicts with user queries.
+Because this spellchecker is querying the main index, you may want to limit how often it queries the index to be sure to avoid any performance conflicts with user queries.
 The `accuracy` setting defines the threshold for a valid suggestion, while `maxEdits` defines the number of changes to the term to allow.
 Since most spelling mistakes are only 1 letter off, setting this to 1 will reduce the number of possible suggestions (the default, however, is 2); the value can only be 1 or 2.
 `minPrefix` defines the minimum number of characters the terms should share.
@@ -101,10 +145,10 @@ Setting this to 1 means that the spelling suggestions will all start with the sa
 
 The `maxInspections` parameter defines the maximum number of possible matches to review before returning results; the default is 5.
 `minQueryLength` defines how many characters must be in the query before suggestions are provided; the default is 4.
-`maxQueryLength` enables the spell checker to skip over very long query terms, which can avoid expensive operations or exceptions.
+`maxQueryLength` enables the spellchecker to skip over very long query terms, which can avoid expensive operations or exceptions.
 There is no limit to term length by default.
 
-At first, spellchecker analyses incoming query words by looking up them in the index.
+At first, spellchecker analyses incoming query words by looking them up in the index.
 Only query words which are absent from the index, or too rare (equal to or below `maxQueryFrequency`) are considered as misspelled and used for finding suggestions.
 Words which are more frequent than `maxQueryFrequency` bypass spellchecker unchanged.
 After suggestions for every misspelled word are found they are filtered for enough frequency with `thresholdTokenFrequency` as boundary value.
@@ -126,19 +170,31 @@ In `solrconfig.xml`, you would define the searchComponent as so:
 <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
   <lst name="spellchecker">
     <str name="classname">solr.FileBasedSpellChecker</str>
-    <str name="name">file</str>
+    <!-- required parameters -->
     <str name="sourceLocation">spellings.txt</str>
+    <!-- optional parameters for FileBasedSpellChecker -->
+    <str name="fieldType">text_general</str>
     <str name="characterEncoding">UTF-8</str>
-    <str name="spellcheckIndexDir">./spellcheckerFile</str>
-    <!-- optional elements with defaults
+    <!-- optional parameters for all spellcheckers -->
+    <str name="spellcheckIndexDir">./spellcheckerDir</str>
+    <str name="name">file</str>
+    <str name="queryAnalyzerFieldType">text_general</str>
     <str name="distanceMeasure">org.apache.lucene.search.spell.LevenshteinDistance</str>
-    <str name="accuracy">0.5</str>
-    -->
+    <str name="comparatorClass">score</str>
+    <float name="accuracy">0.5</float>
+    <float name="thresholdTokenFrequency">0.0</float>
+    <bool name="buildOnCommit">false</bool>
+    <bool name="buildOnOptimize">false</bool>
  </lst>
 </searchComponent>
 ----
 
-The differences here are the use of the `sourceLocation` to define the location of the file of terms and the use of `characterEncoding` to define the encoding of the terms file.
+The configuration is very similar to the `IndexBasedSpellChecker`, and the differences here are the use of the `sourceLocation` to define the location of the file of terms, and the use of `characterEncoding` to define the encoding of the terms file.
+
+If the `fieldType` parameter is specified and matches a type from the Solr schema, Solr will build the spellcheck index
+by first tokenizing each line from the external file using the `fieldType` index analyzer, and then adding each token to the index.
+
+If not, Solr will treat each line from the external file as an individual token, and add them to the spellcheck index as is.
 
 [TIP]
 ====
@@ -160,20 +216,45 @@ Here is how it might be configured in `solrconfig.xml`:
 ----
 <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
   <lst name="spellchecker">
-    <str name="name">wordbreak</str>
     <str name="classname">solr.WordBreakSolrSpellChecker</str>
+    <!-- required parameters -->
     <str name="field">lowerfilt</str>
+    <!-- optional parameters for WordBreakSpellChecker -->
     <str name="combineWords">true</str>
     <str name="breakWords">true</str>
-    <int name="maxChanges">10</int>
+    <str name="breakSuggestionTieBreaker">max_freq</str>
+    <int name="maxChanges">1</int>
+    <int name="maxCombinedLength">20</int>
+    <int name="minBreakLength">1</int>
+    <int name="maxEvaluations">1000</int>
+    <int name="minSuggestionFreq">1</int>
+    <!-- optional parameters for all spellcheckers -->
+    <str name="name">wordbreak</str>
+    <str name="fieldType">lowerfilt_ft</str>
+    <str name="queryAnalyzerFieldType">text_general</str>
   </lst>
 </searchComponent>
 ----
 
-Some of the parameters will be familiar from the discussion of the other spell checkers, such as `name`, `classname`, and `field`.
-New for this spell checker is `combineWords`, which defines whether words should be combined in a dictionary search (default is true); `breakWords`, which defines if words should be broken during a dictionary search (default is true); and `maxChanges`, an integer which defines how many times the spell checker should check collation possibilities against the index (default is 10).
+Some of the parameters should be familiar from the discussion of the other spellcheckers, such as `name`, `classname`, and `field`.
+New for this spellchecker is `combineWords`, which defines whether words should be combined in a dictionary search (default is true);
+and `breakWords`, which defines if words should be broken during a dictionary search (default is true).
+
+`maxChanges` is an integer which defines how many times the spellchecker should check collation possibilities against the index.
+
+`maxCombinedLength` allows skipping over the suggestions which are too long.
+Similarly, `minBreakLength` instructs the spellchecker to not break the word into parts that are too short.
+
+`maxEvaluations` defines the maximum number of word combinations to evaluate - a higher value might improve
+the result quality, while a lower value might improve performance.
+
+`minSuggestionFreq` sets the minimum frequency a term must have to be included as part of a suggestion.
 
-The spellchecker can be configured with a traditional checker (i.e., `DirectSolrSpellChecker`).
+Finally, the `breakSuggestionTieBreaker` setting ("max_freq" or "sum_freq") instructs Solr to
+sort the suggestions by the number of word breaks, and then by the maximum or by the sum of all the component term's
+frequencies, respectively.
+
+The spellchecker can be configured together with a traditional checker (i.e., `DirectSolrSpellChecker`).
 The results are combined and collations can contain a mix of corrections from both spellcheckers.
 
 === Add It to a Request Handler
@@ -278,19 +359,6 @@ If this parameter isn't set, the value defaults to `1`.
 If the parameter is set but not assigned a number, the value defaults to `5`.
 If the parameter is set to a positive integer, that number becomes the maximum number of suggestions returned by the spellchecker.
 
-`spellcheck.queryAnalyzerFieldType`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-A field type from Solr's schema.
-The analyzer configured for the provided field type is used by the QueryConverter to tokenize the value for `q` parameter.
-+
-The field type specified by this parameter should do minimal transformations.
-It's usually a best practice to avoid types that aggressively stem or NGram, for instance, since those types of analysis can throw off spell checking.
-
 `spellcheck.onlyMorePopular`::
 +
 [%autowidth,frame=none]
@@ -423,7 +491,7 @@ For example, even if your regular search results allow for loose matching of one
 |===
 +
 This parameter causes Solr to use the dictionary named in the parameter's argument.
-This parameter can be used to invoke a specific spellchecker on a per request basis.
+This parameter can be used to invoke a specific spellchecker on a per-request basis.
 
 `spellcheck.accuracy`::
 +
@@ -530,4 +598,4 @@ For example:
 http://localhost:8983/solr/techproducts/spell?spellcheck=true&spellcheck.build=true&spellcheck.q=toyata&shards.qt=/spell&shards=solr-shard1:8983/solr/techproducts,solr-shard2:8983/solr/techproducts
 
 In case of a distributed request to the SpellCheckComponent, the shards are requested for at least five suggestions even if the `spellcheck.count` parameter value is less than five.
-Once the suggestions are collected, they are ranked by the configured distance measure (Levenstein Distance by default) and then by aggregate frequency.
+Once the suggestions are collected, they are ranked by the configured distance measure (Levenshtein distance by default) and then by aggregate frequency.
diff --git a/solr/test-framework/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/test-framework/src/test-files/solr/collection1/conf/solrconfig.xml
index 9b66df4cc0e..82dca6384d8 100644
--- a/solr/test-framework/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/test-framework/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -278,7 +278,7 @@
       <str name="name">freq</str>
       <str name="field">lowerfilt</str>
       <str name="spellcheckIndexDir">spellcheckerFreq</str>
-      <!-- comparatorClass be one of:
+      <!-- comparatorClass can be one of:
         1. score (default)
         2. freq (Frequency first, then score)
         3. A fully qualified class name