You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/18 08:28:45 UTC

[lucene-solr] branch master updated: LUCENE-9782: Hunspell suggestions: split by space (but not dash) also before last char (#2387)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 589eefc  LUCENE-9782: Hunspell suggestions: split by space (but not dash) also before last char (#2387)
589eefc is described below

commit 589eefc32b296c9693bd0d462092664ea6567163
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Thu Feb 18 09:28:29 2021 +0100

    LUCENE-9782: Hunspell suggestions: split by space (but not dash) also before last char (#2387)
---
 .../java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java  | 4 ++--
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.aff       | 1 +
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.sug       | 2 +-
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong     | 1 +
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
index 5017ff2..86b6fc5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
@@ -302,12 +302,12 @@ class ModifyingSuggester {
   }
 
   private void trySplitting(String word) {
-    for (int i = 1; i < word.length() - 1; i++) {
+    for (int i = 1; i < word.length(); i++) {
       String w1 = word.substring(0, i);
       String w2 = word.substring(i);
       if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
         result.add(w1 + " " + w2);
-        if (shouldSplitByDash()) {
+        if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
           result.add(w1 + "-" + w2);
         }
       }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.aff
index 8f150cd..a963c96 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.aff
@@ -13,6 +13,7 @@
 
 # switch off ngram suggestion for testing
 MAXNGRAMSUGS 0
+TRY -
 REP 2
 REP alot a_lot
 REP inspite in_spite
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
index bea54b8..9df0cb5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
@@ -11,5 +11,5 @@ which
 Gandhi
 McDonald
 permanent
-
+in, in a
 
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
index 0093de8..470d6f5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
@@ -13,3 +13,4 @@ Mcdonald
 permqnent
 permanent-vacation
 permqnent-vacation
+ina
\ No newline at end of file