You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2023/01/13 11:49:01 UTC

[lucene] 08/09: hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes (#11960)

This is an automated email from the ASF dual-hosted git repository.

donnerpeter pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 6332f4ffbad55b07360ba9bcd4c19cf7eca8891c
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Nov 22 18:23:45 2022 +0100

    hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes (#11960)
    
    hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes
---
 .../java/org/apache/lucene/analysis/hunspell/WordStorage.java    | 9 ++++++---
 .../analysis/hunspell/TestHunspellRepositoryTestCases.java       | 1 -
 .../org/apache/lucene/analysis/hunspell/TestSpellChecking.java   | 4 ++++
 .../src/test/org/apache/lucene/analysis/hunspell/empty.aff       | 2 ++
 .../src/test/org/apache/lucene/analysis/hunspell/empty.dic       | 1 +
 .../src/test/org/apache/lucene/analysis/hunspell/empty.good      | 0
 .../src/test/org/apache/lucene/analysis/hunspell/empty.wrong     | 3 +++
 7 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
index cc39c5f4185..83fdfdea47d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
@@ -424,10 +424,13 @@ class WordStorage {
     }
 
     WordStorage build() throws IOException {
-      assert !group.isEmpty() : "build() should be only called once";
-      flushGroup();
+      if (hashTable.length > 0) {
+        assert !group.isEmpty() : "build() should be only called once";
+        flushGroup();
+      }
       byte[] trimmedData = ArrayUtil.copyOfSubArray(wordData, 0, dataWriter.getPosition());
-      return new WordStorage(maxEntryLength, hasCustomMorphData, hashTable, trimmedData);
+      int[] table = hashTable.length == 0 ? new int[1] : hashTable;
+      return new WordStorage(maxEntryLength, hasCustomMorphData, table, trimmedData);
     }
   }
 
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
index c7a67762a75..5c2ec424b93 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
@@ -42,7 +42,6 @@ public class TestHunspellRepositoryTestCases {
           "hu", // Hungarian is hard: a lot of its rules are hardcoded in Hunspell code, not aff/dic
           "morph", // we don't do morphological analysis yet
           "opentaal_keepcase", // Hunspell bug: https://github.com/hunspell/hunspell/issues/712
-          "forbiddenword", // needs https://github.com/hunspell/hunspell/pull/713 PR to be merged
           "nepali", // not supported yet
           "utf8_nonbmp", // code points not supported yet
           "phone" // not supported yet, used only for suggestions in en_ZA
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
index ba8420b5806..d1b6bea03b3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
@@ -37,6 +37,10 @@ import org.apache.lucene.util.IOUtils;
 
 public class TestSpellChecking extends LuceneTestCase {
 
+  public void testEmpty() throws Exception {
+    doTest("empty");
+  }
+
   public void testBase() throws Exception {
     doTest("base");
   }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff
new file mode 100644
index 00000000000..5540fec275f
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.aff
@@ -0,0 +1,2 @@
+AF 2000
+INVALID something
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic
new file mode 100644
index 00000000000..2995a4d0e74
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.dic
@@ -0,0 +1 @@
+dummy
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.good
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong
new file mode 100644
index 00000000000..b2bd664f7d5
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/empty.wrong
@@ -0,0 +1,3 @@
+everything
+is
+wrong
\ No newline at end of file