You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2023/01/13 11:49:01 UTC
[lucene] 08/09: hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes (#11960)
This is an automated email from the ASF dual-hosted git repository.
donnerpeter pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 6332f4ffbad55b07360ba9bcd4c19cf7eca8891c
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Nov 22 18:23:45 2022 +0100
hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes (#11960)
hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes
---
.../java/org/apache/lucene/analysis/hunspell/WordStorage.java | 9 ++++++---
.../analysis/hunspell/TestHunspellRepositoryTestCases.java | 1 -
.../org/apache/lucene/analysis/hunspell/TestSpellChecking.java | 4 ++++
.../src/test/org/apache/lucene/analysis/hunspell/empty.aff | 2 ++
.../src/test/org/apache/lucene/analysis/hunspell/empty.dic | 1 +
.../src/test/org/apache/lucene/analysis/hunspell/empty.good | 0
.../src/test/org/apache/lucene/analysis/hunspell/empty.wrong | 3 +++
7 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
index cc39c5f4185..83fdfdea47d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
@@ -424,10 +424,13 @@ class WordStorage {
}
WordStorage build() throws IOException {
- assert !group.isEmpty() : "build() should be only called once";
- flushGroup();
+ if (hashTable.length > 0) {
+ assert !group.isEmpty() : "build() should be only called once";
+ flushGroup();
+ }
byte[] trimmedData = ArrayUtil.copyOfSubArray(wordData, 0, dataWriter.getPosition());
- return new WordStorage(maxEntryLength, hasCustomMorphData, hashTable, trimmedData);
+ int[] table = hashTable.length == 0 ? new int[1] : hashTable;
+ return new WordStorage(maxEntryLength, hasCustomMorphData, table, trimmedData);
}
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
index c7a67762a75..5c2ec424b93 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
@@ -42,7 +42,6 @@ public class TestHunspellRepositoryTestCases {
"hu", // Hungarian is hard: a lot of its rules are hardcoded in Hunspell code, not aff/dic
"morph", // we don't do morphological analysis yet
"opentaal_keepcase", // Hunspell bug: https://github.com/hunspell/hunspell/issues/712
- "forbiddenword", // needs https://github.com/hunspell/hunspell/pull/713 PR to be merged
"nepali", // not supported yet
"utf8_nonbmp", // code points not supported yet
"phone" // not supported yet, used only for suggestions in en_ZA
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
index ba8420b5806..d1b6bea03b3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
@@ -37,6 +37,10 @@ import org.apache.lucene.util.IOUtils;
public class TestSpellChecking extends LuceneTestCase {
+ public void testEmpty() throws Exception {
+ doTest("empty");
+ }
+
public void testBase() throws Exception {
doTest("base");
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff
new file mode 100644
index 00000000000..5540fec275f
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff
@@ -0,0 +1,2 @@
+AF 2000
+INVALID something
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic
new file mode 100644
index 00000000000..2995a4d0e74
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic
@@ -0,0 +1 @@
+dummy
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.good
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong
new file mode 100644
index 00000000000..b2bd664f7d5
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong
@@ -0,0 +1,3 @@
+everything
+is
+wrong
\ No newline at end of file