You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/02/23 12:00:16 UTC

[lucene-solr] branch master updated: LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 381a5ca  LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)
381a5ca is described below

commit 381a5cacb001453235e008976f6dc312fb149d37
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Feb 23 13:00:02 2021 +0100

    LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)
---
 .../org/apache/lucene/analysis/hunspell/ModifyingSuggester.java     | 6 +++---
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.dic     | 6 +++++-
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.sug     | 1 +
 .../common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong   | 3 ++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
index 86e34c7..286d1ee 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
@@ -70,18 +70,18 @@ class ModifyingSuggester {
       }
 
       List<String> adjusted =
-          result.stream().map(s -> capitalizeAfterSpace(low, s)).collect(Collectors.toList());
+          result.stream().map(s -> capitalizeAfterSpace(word, s)).collect(Collectors.toList());
       result.clear();
       result.addAll(adjusted);
     }
   }
 
   // aNew -> "a New" (instead of "a new")
-  private String capitalizeAfterSpace(String lowMisspelled, String candidate) {
+  private String capitalizeAfterSpace(String misspelled, String candidate) {
     int space = candidate.indexOf(' ');
     int tail = candidate.length() - space - 1;
     if (space > 0
-        && lowMisspelled.regionMatches(lowMisspelled.length() - tail, candidate, space + 1, tail)) {
+        && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
       return candidate.substring(0, space + 1)
           + Character.toUpperCase(candidate.charAt(space + 1))
           + candidate.substring(space + 2);
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
index 6725561..6ea06f1 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
@@ -15,4 +15,8 @@ spite
 inspire
 Saiph
 sahib
-ship
\ No newline at end of file
+ship
+ESP
+esp
+s
+S
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
index 4595756..e7a52c4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
@@ -13,3 +13,4 @@ McDonald
 permanent
 in, in a
 Saiph, Ship, Sahib
+ESP, ESP s, Esp, Esp s
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
index d8875d8..356a08d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
@@ -14,4 +14,5 @@ permqnent
 permanent-vacation
 permqnent-vacation
 ina
-Sahip
\ No newline at end of file
+Sahip
+ESPs
\ No newline at end of file