You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/02/23 12:00:16 UTC
[lucene-solr] branch master updated: LUCENE-9805: Hunspell: fix
space + mixed case heuristics on suggestions (#2420)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 381a5ca LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)
381a5ca is described below
commit 381a5cacb001453235e008976f6dc312fb149d37
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Feb 23 13:00:02 2021 +0100
LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420)
---
.../org/apache/lucene/analysis/hunspell/ModifyingSuggester.java | 6 +++---
.../common/src/test/org/apache/lucene/analysis/hunspell/sug.dic | 6 +++++-
.../common/src/test/org/apache/lucene/analysis/hunspell/sug.sug | 1 +
.../common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong | 3 ++-
4 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
index 86e34c7..286d1ee 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java
@@ -70,18 +70,18 @@ class ModifyingSuggester {
}
List<String> adjusted =
- result.stream().map(s -> capitalizeAfterSpace(low, s)).collect(Collectors.toList());
+ result.stream().map(s -> capitalizeAfterSpace(word, s)).collect(Collectors.toList());
result.clear();
result.addAll(adjusted);
}
}
// aNew -> "a New" (instead of "a new")
- private String capitalizeAfterSpace(String lowMisspelled, String candidate) {
+ private String capitalizeAfterSpace(String misspelled, String candidate) {
int space = candidate.indexOf(' ');
int tail = candidate.length() - space - 1;
if (space > 0
- && lowMisspelled.regionMatches(lowMisspelled.length() - tail, candidate, space + 1, tail)) {
+ && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
return candidate.substring(0, space + 1)
+ Character.toUpperCase(candidate.charAt(space + 1))
+ candidate.substring(space + 2);
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
index 6725561..6ea06f1 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic
@@ -15,4 +15,8 @@ spite
inspire
Saiph
sahib
-ship
\ No newline at end of file
+ship
+ESP
+esp
+s
+S
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
index 4595756..e7a52c4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug
@@ -13,3 +13,4 @@ McDonald
permanent
in, in a
Saiph, Ship, Sahib
+ESP, ESP s, Esp, Esp s
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
index d8875d8..356a08d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong
@@ -14,4 +14,5 @@ permqnent
permanent-vacation
permqnent-vacation
ina
-Sahip
\ No newline at end of file
+Sahip
+ESPs
\ No newline at end of file