You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/02 09:46:43 UTC

[lucene-solr] branch master updated: LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new b48d5be  LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
b48d5be is described below

commit b48d5beb34957e83e99ced60d57d4839b474f018
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Feb 2 10:46:14 2021 +0100

    LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
---
 gradle/testing/randomization/policies/tests.policy |  3 +
 .../lucene/analysis/hunspell/SpellCheckerTest.java | 90 ++++++++++------------
 .../hunspell/TestHunspellRepositoryTestCases.java  | 70 +++++++++++++++++
 3 files changed, 112 insertions(+), 51 deletions(-)

diff --git a/gradle/testing/randomization/policies/tests.policy b/gradle/testing/randomization/policies/tests.policy
index 607d5a6..151f02e 100644
--- a/gradle/testing/randomization/policies/tests.policy
+++ b/gradle/testing/randomization/policies/tests.policy
@@ -90,6 +90,9 @@ grant {
 
   // allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
   permission java.security.SecurityPermission "createAccessControlContext";
+
+  // Some Hunspell tests may read from external files specified in system properties
+  permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
 };
 
 // Permissions to support ant build
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
index 123ee18..cb56816 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
@@ -16,35 +16,31 @@
  */
 package org.apache.lucene.analysis.hunspell;
 
+import java.io.IOException;
 import java.io.InputStream;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.text.ParseException;
 import java.util.List;
-import java.util.Objects;
 import java.util.stream.Collectors;
 import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.util.IOUtils;
-import org.junit.Test;
 
 public class SpellCheckerTest extends StemmerTestBase {
-  @Test
-  public void base() throws Exception {
+
+  public void testBase() throws Exception {
     doTest("base");
   }
 
-  @Test
-  public void baseUtf() throws Exception {
+  public void testBaseUtf() throws Exception {
     doTest("base_utf");
   }
 
-  @Test
-  public void keepcase() throws Exception {
+  public void testKeepcase() throws Exception {
     doTest("keepcase");
   }
 
-  @Test
-  public void allcaps() throws Exception {
+  public void testAllcaps() throws Exception {
     doTest("allcaps");
   }
 
@@ -52,63 +48,51 @@ public class SpellCheckerTest extends StemmerTestBase {
     doTest("rep");
   }
 
-  @Test
-  public void forceUCase() throws Exception {
+  public void testForceUCase() throws Exception {
     doTest("forceucase");
   }
 
-  @Test
-  public void checkSharpS() throws Exception {
+  public void testCheckSharpS() throws Exception {
     doTest("checksharps");
   }
 
-  @Test
-  public void IJ() throws Exception {
+  public void testIJ() throws Exception {
     doTest("IJ");
   }
 
-  @Test
-  public void i53643_numbersWithSeparators() throws Exception {
+  public void testI53643_numbersWithSeparators() throws Exception {
     doTest("i53643");
   }
 
-  @Test
-  public void dotless_i() throws Exception {
+  public void testDotless_i() throws Exception {
     doTest("dotless_i");
   }
 
-  @Test
-  public void needAffixOnAffixes() throws Exception {
+  public void testNeedAffixOnAffixes() throws Exception {
     doTest("needaffix5");
   }
 
-  @Test
-  public void compoundFlag() throws Exception {
+  public void testCompoundFlag() throws Exception {
     doTest("compoundflag");
   }
 
-  @Test
-  public void checkCompoundCase() throws Exception {
+  public void testCheckCompoundCase() throws Exception {
     doTest("checkcompoundcase");
   }
 
-  @Test
-  public void checkCompoundDup() throws Exception {
+  public void testCheckCompoundDup() throws Exception {
     doTest("checkcompounddup");
   }
 
-  @Test
-  public void checkCompoundTriple() throws Exception {
+  public void testCheckCompoundTriple() throws Exception {
     doTest("checkcompoundtriple");
   }
 
-  @Test
-  public void simplifiedTriple() throws Exception {
+  public void testSimplifiedTriple() throws Exception {
     doTest("simplifiedtriple");
   }
 
-  @Test
-  public void compoundForbid() throws Exception {
+  public void testCompoundForbid() throws Exception {
     doTest("compoundforbid");
   }
 
@@ -161,10 +145,14 @@ public class SpellCheckerTest extends StemmerTestBase {
   }
 
   protected void doTest(String name) throws Exception {
-    InputStream affixStream =
-        Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
-    InputStream dictStream =
-        Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
+    checkSpellCheckerExpectations(
+        Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
+  }
+
+  static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
+      throws IOException, ParseException {
+    InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
+    InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
 
     SpellChecker speller;
     try {
@@ -176,30 +164,30 @@ public class SpellCheckerTest extends StemmerTestBase {
       IOUtils.closeWhileHandlingException(dictStream);
     }
 
-    URL good = StemmerTestBase.class.getResource(name + ".good");
-    if (good != null) {
-      for (String word : Files.readAllLines(Path.of(good.toURI()))) {
-        assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
+    Path good = Path.of(basePath + ".good");
+    if (Files.exists(good)) {
+      for (String word : Files.readAllLines(good)) {
+        assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
       }
     }
 
-    URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
-    URL sug = StemmerTestBase.class.getResource(name + ".sug");
-    if (wrong != null) {
-      List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
+    Path wrong = Path.of(basePath + ".wrong");
+    Path sug = Path.of(basePath + ".sug");
+    if (Files.exists(wrong)) {
+      List<String> wrongWords = Files.readAllLines(wrong);
       for (String word : wrongWords) {
-        assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
+        assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
       }
-      if (sug != null) {
+      if (Files.exists(sug) && checkSuggestions) {
         String suggestions =
             wrongWords.stream()
                 .map(s -> String.join(", ", speller.suggest(s)))
                 .filter(s -> !s.isEmpty())
                 .collect(Collectors.joining("\n"));
-        assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
+        assertEquals(Files.readString(sug).trim(), suggestions);
       }
     } else {
-      assertNull(".sug file without .wrong file!", sug);
+      assertFalse(".sug file without .wrong file!", Files.exists(sug));
     }
   }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
new file mode 100644
index 0000000..048dc04
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.hunspell;
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.text.ParseException;
+import java.util.Collection;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+import org.junit.AssumptionViolatedException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
+ * out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
+ */
+@RunWith(Parameterized.class)
+public class TestHunspellRepositoryTestCases {
+  private final Path pathPrefix;
+
+  public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
+    this.pathPrefix = pathPrefix;
+  }
+
+  @Parameterized.Parameters(name = "{0}")
+  public static Collection<Object[]> data() throws IOException {
+    String hunspellRepo = System.getProperty("hunspell.repo.path");
+    if (hunspellRepo == null) {
+      throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
+    }
+
+    Set<String> names = new TreeSet<>();
+    Path tests = Path.of(hunspellRepo).resolve("tests");
+    try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
+      for (Path file : files) {
+        String name = file.getFileName().toString();
+        if (name.endsWith(".aff")) {
+          names.add(name.substring(0, name.length() - 4));
+        }
+      }
+    }
+
+    return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
+  }
+
+  @Test
+  public void test() throws IOException, ParseException {
+    SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
+  }
+}