You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/02 09:46:43 UTC
[lucene-solr] branch master updated: LUCENE-9707: Hunspell: check
Lucene's implementation against Hunspel's test data (#2267)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new b48d5be LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
b48d5be is described below
commit b48d5beb34957e83e99ced60d57d4839b474f018
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Tue Feb 2 10:46:14 2021 +0100
LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
---
gradle/testing/randomization/policies/tests.policy | 3 +
.../lucene/analysis/hunspell/SpellCheckerTest.java | 90 ++++++++++------------
.../hunspell/TestHunspellRepositoryTestCases.java | 70 +++++++++++++++++
3 files changed, 112 insertions(+), 51 deletions(-)
diff --git a/gradle/testing/randomization/policies/tests.policy b/gradle/testing/randomization/policies/tests.policy
index 607d5a6..151f02e 100644
--- a/gradle/testing/randomization/policies/tests.policy
+++ b/gradle/testing/randomization/policies/tests.policy
@@ -90,6 +90,9 @@ grant {
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
permission java.security.SecurityPermission "createAccessControlContext";
+
+ // Some Hunspell tests may read from external files specified in system properties
+ permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
};
// Permissions to support ant build
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
index 123ee18..cb56816 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
@@ -16,35 +16,31 @@
*/
package org.apache.lucene.analysis.hunspell;
+import java.io.IOException;
import java.io.InputStream;
-import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.text.ParseException;
import java.util.List;
-import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.util.IOUtils;
-import org.junit.Test;
public class SpellCheckerTest extends StemmerTestBase {
- @Test
- public void base() throws Exception {
+
+ public void testBase() throws Exception {
doTest("base");
}
- @Test
- public void baseUtf() throws Exception {
+ public void testBaseUtf() throws Exception {
doTest("base_utf");
}
- @Test
- public void keepcase() throws Exception {
+ public void testKeepcase() throws Exception {
doTest("keepcase");
}
- @Test
- public void allcaps() throws Exception {
+ public void testAllcaps() throws Exception {
doTest("allcaps");
}
@@ -52,63 +48,51 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("rep");
}
- @Test
- public void forceUCase() throws Exception {
+ public void testForceUCase() throws Exception {
doTest("forceucase");
}
- @Test
- public void checkSharpS() throws Exception {
+ public void testCheckSharpS() throws Exception {
doTest("checksharps");
}
- @Test
- public void IJ() throws Exception {
+ public void testIJ() throws Exception {
doTest("IJ");
}
- @Test
- public void i53643_numbersWithSeparators() throws Exception {
+ public void testI53643_numbersWithSeparators() throws Exception {
doTest("i53643");
}
- @Test
- public void dotless_i() throws Exception {
+ public void testDotless_i() throws Exception {
doTest("dotless_i");
}
- @Test
- public void needAffixOnAffixes() throws Exception {
+ public void testNeedAffixOnAffixes() throws Exception {
doTest("needaffix5");
}
- @Test
- public void compoundFlag() throws Exception {
+ public void testCompoundFlag() throws Exception {
doTest("compoundflag");
}
- @Test
- public void checkCompoundCase() throws Exception {
+ public void testCheckCompoundCase() throws Exception {
doTest("checkcompoundcase");
}
- @Test
- public void checkCompoundDup() throws Exception {
+ public void testCheckCompoundDup() throws Exception {
doTest("checkcompounddup");
}
- @Test
- public void checkCompoundTriple() throws Exception {
+ public void testCheckCompoundTriple() throws Exception {
doTest("checkcompoundtriple");
}
- @Test
- public void simplifiedTriple() throws Exception {
+ public void testSimplifiedTriple() throws Exception {
doTest("simplifiedtriple");
}
- @Test
- public void compoundForbid() throws Exception {
+ public void testCompoundForbid() throws Exception {
doTest("compoundforbid");
}
@@ -161,10 +145,14 @@ public class SpellCheckerTest extends StemmerTestBase {
}
protected void doTest(String name) throws Exception {
- InputStream affixStream =
- Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
- InputStream dictStream =
- Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
+ checkSpellCheckerExpectations(
+ Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
+ }
+
+ static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
+ throws IOException, ParseException {
+ InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
+ InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
SpellChecker speller;
try {
@@ -176,30 +164,30 @@ public class SpellCheckerTest extends StemmerTestBase {
IOUtils.closeWhileHandlingException(dictStream);
}
- URL good = StemmerTestBase.class.getResource(name + ".good");
- if (good != null) {
- for (String word : Files.readAllLines(Path.of(good.toURI()))) {
- assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
+ Path good = Path.of(basePath + ".good");
+ if (Files.exists(good)) {
+ for (String word : Files.readAllLines(good)) {
+ assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
}
}
- URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
- URL sug = StemmerTestBase.class.getResource(name + ".sug");
- if (wrong != null) {
- List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
+ Path wrong = Path.of(basePath + ".wrong");
+ Path sug = Path.of(basePath + ".sug");
+ if (Files.exists(wrong)) {
+ List<String> wrongWords = Files.readAllLines(wrong);
for (String word : wrongWords) {
- assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
+ assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
}
- if (sug != null) {
+ if (Files.exists(sug) && checkSuggestions) {
String suggestions =
wrongWords.stream()
.map(s -> String.join(", ", speller.suggest(s)))
.filter(s -> !s.isEmpty())
.collect(Collectors.joining("\n"));
- assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
+ assertEquals(Files.readString(sug).trim(), suggestions);
}
} else {
- assertNull(".sug file without .wrong file!", sug);
+ assertFalse(".sug file without .wrong file!", Files.exists(sug));
}
}
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
new file mode 100644
index 0000000..048dc04
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellRepositoryTestCases.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.hunspell;
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.text.ParseException;
+import java.util.Collection;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+import org.junit.AssumptionViolatedException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
+ * out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
+ */
+@RunWith(Parameterized.class)
+public class TestHunspellRepositoryTestCases {
+ private final Path pathPrefix;
+
+ public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
+ this.pathPrefix = pathPrefix;
+ }
+
+ @Parameterized.Parameters(name = "{0}")
+ public static Collection<Object[]> data() throws IOException {
+ String hunspellRepo = System.getProperty("hunspell.repo.path");
+ if (hunspellRepo == null) {
+ throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
+ }
+
+ Set<String> names = new TreeSet<>();
+ Path tests = Path.of(hunspellRepo).resolve("tests");
+ try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
+ for (Path file : files) {
+ String name = file.getFileName().toString();
+ if (name.endsWith(".aff")) {
+ names.add(name.substring(0, name.length() - 4));
+ }
+ }
+ }
+
+ return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
+ }
+
+ @Test
+ public void test() throws IOException, ParseException {
+ SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
+ }
+}