You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/01/29 10:47:06 UTC
[lucene-solr] branch master updated: LUCENE-9702: Hunspell: support
alternate casing for short language codes (#2253)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new ff943ec LUCENE-9702: Hunspell: support alternate casing for short language codes (#2253)
ff943ec is described below
commit ff943ece8fced15d7d585469929f97405b45513e
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Fri Jan 29 11:46:45 2021 +0100
LUCENE-9702: Hunspell: support alternate casing for short language codes (#2253)
---
.../src/java/org/apache/lucene/analysis/hunspell/Dictionary.java | 4 +++-
.../org/apache/lucene/analysis/hunspell/SpellCheckerTest.java | 5 +++++
.../src/test/org/apache/lucene/analysis/hunspell/dotless_i.aff | 2 ++
.../src/test/org/apache/lucene/analysis/hunspell/dotless_i.dic | 4 ++++
.../src/test/org/apache/lucene/analysis/hunspell/dotless_i.good | 8 ++++++++
.../src/test/org/apache/lucene/analysis/hunspell/dotless_i.wrong | 6 ++++++
6 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
index f38ab59..4dc76ab 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -374,7 +374,9 @@ public class Dictionary {
fullStrip = true;
} else if ("LANG".equals(firstWord)) {
language = singleArgument(reader, line);
- alternateCasing = "tr_TR".equals(language) || "az_AZ".equals(language);
+ int underscore = language.indexOf("_");
+ String langCode = underscore < 0 ? language : language.substring(0, underscore);
+ alternateCasing = langCode.equals("tr") || langCode.equals("az");
} else if ("BREAK".equals(firstWord)) {
breaks = parseBreaks(reader, line);
} else if ("FORBIDDENWORD".equals(firstWord)) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
index a51a43b..4c5601b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
@@ -57,6 +57,11 @@ public class SpellCheckerTest extends StemmerTestBase {
}
@Test
+ public void dotless_i() throws Exception {
+ doTest("dotless_i");
+ }
+
+ @Test
public void needAffixOnAffixes() throws Exception {
doTest("needaffix5");
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.aff
new file mode 100644
index 0000000..e9b3a60
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.aff
@@ -0,0 +1,2 @@
+SET UTF-8
+LANG tr
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.dic
new file mode 100644
index 0000000..07452ba
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.dic
@@ -0,0 +1,4 @@
+3
+iç
+ışık
+Diyarbakır
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.good
new file mode 100644
index 0000000..50f046b
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.good
@@ -0,0 +1,8 @@
+Diyarbakır
+DİYARBAKIR
+iç
+İç
+ışık
+Işık
+İÇ
+IŞIK
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.wrong
new file mode 100644
index 0000000..ddd49d3
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/dotless_i.wrong
@@ -0,0 +1,6 @@
+Diyarbakir
+DIYARBAKIR
+Iç
+İşık
+IÇ
+İŞIK