You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by rz...@apache.org on 2022/12/22 09:45:39 UTC
[opennlp] branch main updated: OPENNLP-1419 Enhance Tests and JavaDoc in opennlp.morfologik package
This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 70a7e071 OPENNLP-1419 Enhance Tests and JavaDoc in opennlp.morfologik package
70a7e071 is described below
commit 70a7e071fc98bcced74817774bb87637574d7d68
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Mon Dec 19 22:35:05 2022 +0100
OPENNLP-1419 Enhance Tests and JavaDoc in opennlp.morfologik package
- renames some unit test classes to match the class names under test
- introduces common base test class `AbstractMorfologikTest` to reduce dependencies among test classes
- extracts (test) resource loading to separate methods in `AbstractMorfologikTest`
- adds further assertions in `MorfologikPOSTaggerFactoryTest#testPOSTaggerWithCustomFactory()`
- adds missing JavaDoc to impl classes
- improves existing documentation for clarity
- removes superfluous text
- adds 'Override' annotation where useful and applicable
- fixes several typos, e.g. in `XMLDictionaryToTableParams`
---
...ilder.java => MorfologikDictionaryBuilder.java} | 39 ++++++++-------
.../main/java/opennlp/morfologik/cmdline/CLI.java | 2 +-
.../builder/MorfologikDictionaryBuilderTool.java | 9 ++--
.../builder/XMLDictionaryToTableParams.java | 2 +-
.../cmdline/builder/XMLDictionaryToTableTool.java | 4 +-
.../lemmatizer/MorfologikLemmatizer.java | 4 ++
.../tagdict/MorfologikPOSTaggerFactory.java | 28 ++++++++---
.../tagdict/MorfologikTagDictionary.java | 24 ++++------
.../opennlp/morfologik/util/MorfologikUtil.java | 9 ++--
.../opennlp/morfologik/AbstractMorfologikTest.java | 56 ++++++++++++++++++++++
...t.java => MorfologikDictionaryBuilderTest.java} | 34 ++++---------
.../lemmatizer/MorfologikLemmatizerTest.java | 11 +++--
...st.java => MorfologikPOSTaggerFactoryTest.java} | 21 ++++----
.../tagdict/MorfologikTagDictionaryTest.java | 9 ++--
14 files changed, 163 insertions(+), 89 deletions(-)
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
similarity index 66%
rename from opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
rename to opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
index 52cc64ee..8fdf4bdb 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
@@ -27,24 +27,28 @@ import morfologik.stemming.EncoderType;
import morfologik.tools.DictCompile;
/**
- * Utility class to build Morfologik dictionaries from a tab separated values
- * file. The first column is the word, the second its lemma and the third a POS
- * tag. If there is no lemma information leave the second column empty.
+ * Utility class to build Morfologik dictionaries from a tab separated
+ * values file.
+ * <p>
+ * The first column is the word, the second its lemma and the third a POS
+ * tag (base,inflected,tag). If there is no lemma information leave the
+ * second column empty.
*/
-public class MorfologikDictionayBuilder {
+public class MorfologikDictionaryBuilder {
/**
* Helper to compile a morphological dictionary automaton.
*
- * @param input The input file (base,inflected,tag). An associated metadata
- * (*.info) file must exist.
- * @param overwrite Overwrite the output file if it exists.
- * @param validate Validate input to make sure it makes sense.
- * @param acceptBom Accept leading BOM bytes (UTF-8).
- * @param acceptCr Accept CR bytes in input sequences (\r).
- * @param ignoreEmpty Ignore empty lines in the input.
- * @return the dictionary path
- * @throws Exception
+ * @param input The {@link Path input file} (base,inflected,tag).
+ * An associated metadata ({@code *.info}) file must exist.
+ * @param overwrite Whether to overwrite the output file if it exists, or not.
+ * @param validate Whether to validate input to make sure it makes sense.
+ * @param acceptBom Whether to accept leading BOM bytes (UTF-8), or not.
+ * @param acceptCr Whether to accept CR bytes in input sequences ({@code \r}), or not.
+ * @param ignoreEmpty Whether to ignore empty lines in the input, or not.
+ *
+ * @return The resulting dictionary {@link Path}.
+ * @throws Exception Thrown if errors occurred during dictionary compilation.
*/
public Path build(Path input, boolean overwrite, boolean validate,
boolean acceptBom, boolean acceptCr, boolean ignoreEmpty)
@@ -65,10 +69,11 @@ public class MorfologikDictionayBuilder {
* Helper to compile a morphological dictionary automaton using default
* parameters.
*
- * @param input The input file (base,inflected,tag). An associated metadata
- * (*.info) file must exist.
- * @return the dictionary path
- * @throws Exception
+ * @param input The {@link Path input file} (base,inflected,tag).
+ * An associated metadata ({@code *.info}) file must exist.
+ *
+ * @return The resulting dictionary {@link Path}.
+ * @throws Exception Thrown if errors occurred during dictionary compilation.
*/
public Path build(Path input) throws Exception {
return build(input, true, true, false, false, false);
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
index bd929886..f99c71f3 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -55,7 +55,7 @@ public final class CLI {
}
/**
- * @return a set which contains all tool names
+ * @return A set which contains all tool names.
*/
public static Set<String> getToolNames() {
return toolLookupMap.keySet();
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
index f916edb4..b7118c15 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
@@ -22,7 +22,7 @@ import java.nio.file.Path;
import morfologik.stemming.DictionaryMetadata;
-import opennlp.morfologik.builder.MorfologikDictionayBuilder;
+import opennlp.morfologik.builder.MorfologikDictionaryBuilder;
import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
@@ -32,14 +32,17 @@ public class MorfologikDictionaryBuilderTool extends BasicCmdLineTool {
interface Params extends MorfologikDictionaryBuilderParams {
}
+ @Override
public String getShortDescription() {
return "builds a binary POS Dictionary using Morfologik";
}
+ @Override
public String getHelp() {
return getBasicHelp(Params.class);
}
+ @Override
public void run(String[] args) {
Params params = validateAndParseParams(args, Params.class);
@@ -49,14 +52,14 @@ public class MorfologikDictionaryBuilderTool extends BasicCmdLineTool {
Path metadataPath = DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath());
CmdLineUtil.checkInputFile("dictionary metadata (.info) input file", metadataPath.toFile());
- MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
+ MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder();
try {
builder.build(dictInFile.toPath(), params.getOverwrite(),
params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(),
params.getIgnoreEmpty());
} catch (Exception e) {
throw new TerminateToolException(-1,
- "Error while creating Morfologik POS Dictionay: " + e.getMessage(), e);
+ "Error while creating Morfologik POS Dictionary: " + e.getMessage(), e);
}
}
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
index 529544d6..01773ccb 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
@@ -35,7 +35,7 @@ interface XMLDictionaryToTableParams extends EncodingParameter {
+ "(.info will be also created).")
File getOutputFile();
- @ParameterDescription(valueName = "char", description = "Columm separator "
+ @ParameterDescription(valueName = "char", description = "Column separator "
+ "(must be a single character)")
@OptionalParameter(defaultValue = ",")
String getSeparator();
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
index 23c43b6e..1516d61d 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
@@ -36,16 +36,18 @@ import opennlp.tools.postag.POSDictionary;
public class XMLDictionaryToTableTool extends BasicCmdLineTool {
- private String SEPARATOR;
+ private static String SEPARATOR;
public String getShortDescription() {
return "reads an OpenNLP XML tag dictionary and outputs it in a tabular file";
}
+ @Override
public String getHelp() {
return getBasicHelp(Params.class);
}
+ @Override
public void run(String[] args) {
Params params = validateAndParseParams(args, Params.class);
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
index a9ce6b26..6a3b8db1 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
@@ -32,6 +32,10 @@ import morfologik.stemming.WordData;
import opennlp.tools.lemmatizer.Lemmatizer;
+/**
+ * A {@link Lemmatizer} implementation based on Morfologik binary
+ * dictionaries
+ */
public class MorfologikLemmatizer implements Lemmatizer {
private final Dictionary dictionary;
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
index a49a7823..85ad6e44 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -33,6 +33,10 @@ import opennlp.tools.postag.TagDictionary;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.ByteArraySerializer;
+/**
+ * The factory provides a Morfologik specific {@link POSTaggerFactory} implementation
+ * and initializes related resources.
+ */
public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
private static final String MORFOLOGIK_POSDICT_SUF = "morfologik_dict";
@@ -51,6 +55,16 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
public MorfologikPOSTaggerFactory() {
}
+ /**
+ * Initializes a {@link TagDictionary} from a {@link File dictionary file}.
+ *
+ * @param dictionary The {@link File} used as input for creating the dictionary.
+ *
+ * @return A valid {@link TagDictionary} ready for use.
+ * @throws FileNotFoundException Thrown if {@code dictionary} or related metadata
+ * could not be read in.
+ * @throws IOException Thrown if IO errors occurred.
+ */
public TagDictionary createTagDictionary(File dictionary) throws IOException {
if (!dictionary.canRead()) {
@@ -69,6 +83,11 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
return createMorfologikDictionary(dictData, dictInfo);
}
+ /**
+ * @return The {@link TagDictionary} used.
+ * @throws RuntimeException Thrown if errors occurred loading or reading
+ * Morfologik dictionary files.
+ */
@Override
public TagDictionary getTagDictionary() {
if (this.dict == null) {
@@ -76,10 +95,8 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
if (artifactProvider != null) {
Object obj = artifactProvider.getArtifact(MORFOLOGIK_POSDICT);
if (obj != null) {
- byte[] data = artifactProvider
- .getArtifact(MORFOLOGIK_POSDICT);
- byte[] info = artifactProvider
- .getArtifact(MORFOLOGIK_DICT_INFO);
+ byte[] data = artifactProvider.getArtifact(MORFOLOGIK_POSDICT);
+ byte[] info = artifactProvider.getArtifact(MORFOLOGIK_DICT_INFO);
try {
this.dict = createMorfologikDictionary(data, info);
@@ -137,8 +154,7 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
private TagDictionary createMorfologikDictionary(byte[] data, byte[] info)
throws IOException {
morfologik.stemming.Dictionary dict = morfologik.stemming.Dictionary
- .read(new ByteArrayInputStream(data), new ByteArrayInputStream(
- info));
+ .read(new ByteArrayInputStream(data), new ByteArrayInputStream(info));
return new MorfologikTagDictionary(dict);
}
}
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
index 0d03eb8d..4df18e49 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
@@ -37,27 +37,23 @@ public class MorfologikTagDictionary implements TagDictionary {
private final boolean isCaseSensitive;
/**
- * Creates a case sensitive {@link MorfologikTagDictionary}
+ * Initializes a case sensitive {@link MorfologikTagDictionary}
*
- * @param dict
- * a Morfologik FSA dictionary
- * @throws IllegalArgumentException
- * if FSA's root node cannot be acquired (dictionary is empty).
+ * @param dict A Morfologik FSA {@link Dictionary}.
+ * @throws IllegalArgumentException Thrown if FSA's root node cannot be acquired
+ * (dictionary is empty).
*/
- public MorfologikTagDictionary(Dictionary dict)
- throws IllegalArgumentException {
+ public MorfologikTagDictionary(Dictionary dict) throws IllegalArgumentException {
this(dict, true);
}
/**
- * Creates MorfologikLemmatizer
+ * Initializes a {@link MorfologikTagDictionary}
*
- * @param dict
- * a Morfologik FSA dictionary
- * @param caseSensitive
- * if true it performs case-sensitive lookup
- * @throws IllegalArgumentException
- * if FSA's root node cannot be acquired (dictionary is empty).
+ * @param dict A Morfologik FSA {@link Dictionary}.
+ * @param caseSensitive If {@code true} it performs case-sensitive lookup
+ * @throws IllegalArgumentException Thrown if FSA's root node cannot be acquired
+ * (dictionary is empty).
*/
public MorfologikTagDictionary(Dictionary dict, boolean caseSensitive)
throws IllegalArgumentException {
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
index f0e35cdc..f3d67955 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
@@ -21,16 +21,17 @@ import java.io.File;
import morfologik.stemming.DictionaryMetadata;
+/**
+ * A utility class that helps in finding (related) Morfologik files.
+ */
public class MorfologikUtil {
public static File getExpectedPropertiesFile(File dictFile) {
- return DictionaryMetadata.getExpectedMetadataLocation(dictFile.toPath())
- .toFile();
+ return DictionaryMetadata.getExpectedMetadataLocation(dictFile.toPath()).toFile();
}
public static File getExpectedPropertiesFile(String dictFile) {
- File f = new File(dictFile);
- return getExpectedPropertiesFile(f);
+ return getExpectedPropertiesFile(new File(dictFile));
}
}
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/AbstractMorfologikTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/AbstractMorfologikTest.java
new file mode 100644
index 00000000..ce3fe063
--- /dev/null
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/AbstractMorfologikTest.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik;
+
+import java.io.File;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+
+import morfologik.stemming.DictionaryMetadata;
+
+import opennlp.morfologik.builder.MorfologikDictionaryBuilder;
+
+public abstract class AbstractMorfologikTest {
+
+ protected static Path createMorfologikDictionary() throws Exception {
+ Path tabFilePath = File.createTempFile(AbstractMorfologikTest.class.getName(), ".txt").toPath();
+ tabFilePath.toFile().deleteOnExit();
+ Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath);
+ infoFilePath.toFile().deleteOnExit();
+
+ Files.copy(getResourceStream("/dictionaryWithLemma.txt"), tabFilePath,
+ StandardCopyOption.REPLACE_EXISTING);
+ Files.copy(getResourceStream("/dictionaryWithLemma.info"), infoFilePath,
+ StandardCopyOption.REPLACE_EXISTING);
+
+ MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder();
+
+ return builder.build(tabFilePath);
+ }
+
+ private static InputStream getResourceStream(String name) {
+ return AbstractMorfologikTest.class.getResourceAsStream(name);
+ }
+
+ protected static URL getResource(String name) {
+ return AbstractMorfologikTest.class.getResource(name);
+ }
+}
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/MorfologikDictionaryBuilderTest.java
similarity index 66%
rename from opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
rename to opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/MorfologikDictionaryBuilderTest.java
index 46b6da24..6de83f73 100644
--- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/MorfologikDictionaryBuilderTest.java
@@ -17,50 +17,32 @@
package opennlp.morfologik.builder;
-import java.io.File;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.nio.file.StandardCopyOption;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
-import morfologik.stemming.DictionaryMetadata;
-
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import opennlp.morfologik.AbstractMorfologikTest;
import opennlp.morfologik.lemmatizer.MorfologikLemmatizer;
-public class POSDictionayBuilderTest {
-
- public static Path createMorfologikDictionary() throws Exception {
- Path tabFilePath = File.createTempFile(
- POSDictionayBuilderTest.class.getName(), ".txt").toPath();
- tabFilePath.toFile().deleteOnExit();
- Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath);
- infoFilePath.toFile().deleteOnExit();
-
- Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
- "/dictionaryWithLemma.txt"), tabFilePath, StandardCopyOption.REPLACE_EXISTING);
- Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
- "/dictionaryWithLemma.info"), infoFilePath, StandardCopyOption.REPLACE_EXISTING);
-
- MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
-
- return builder.build(tabFilePath);
- }
+/**
+ * Tests for the {@link MorfologikDictionaryBuilder} class.
+ */
+public class MorfologikDictionaryBuilderTest extends AbstractMorfologikTest {
@Test
- public void testMultithread() throws Exception {
+ public void testMultithreading() throws Exception {
// Part 1: compile a FSA lemma dictionary
// we need the tabular dictionary. It is mandatory to have info
// file with same name, but .info extension
// this will build a binary dictionary located in compiledLemmaDictionary
- Path compiledLemmaDictionary = new MorfologikDictionayBuilder().build(
- Paths.get(POSDictionayBuilderTest.class.getResource("/dictionaryWithLemma.txt").getPath()));
+ Path compiledLemmaDictionary = new MorfologikDictionaryBuilder().build(
+ Paths.get(getResource("/dictionaryWithLemma.txt").getPath()));
// Part 2: load a MorfologikLemmatizer and use it
MorfologikLemmatizer lemmatizer = new MorfologikLemmatizer(compiledLemmaDictionary);
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
index cbf1c6fd..161aaa0d 100644
--- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
@@ -24,10 +24,13 @@ import java.util.List;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.AbstractMorfologikTest;
import opennlp.tools.lemmatizer.Lemmatizer;
-public class MorfologikLemmatizerTest {
+/**
+ * Tests for the {@link MorfologikLemmatizer} class.
+ */
+public class MorfologikLemmatizerTest extends AbstractMorfologikTest {
@Test
public void testLemmatizeInsensitive() throws Exception {
@@ -42,7 +45,7 @@ public class MorfologikLemmatizerTest {
Assertions.assertEquals("casar", lemmas[0]);
Assertions.assertEquals("casa", lemmas[1]);
- // lookup is case insensitive. There is no entry casa - prop
+ // lookup is case-insensitive. There is no entry casa - prop
Assertions.assertNull(lemmas[2]);
}
@@ -61,7 +64,7 @@ public class MorfologikLemmatizerTest {
private MorfologikLemmatizer createDictionary(boolean caseSensitive)
throws Exception {
- Path output = POSDictionayBuilderTest.createMorfologikDictionary();
+ Path output = createMorfologikDictionary();
output.toFile().deleteOnExit();
return new MorfologikLemmatizer(output);
}
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactoryTest.java
similarity index 83%
rename from opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
rename to opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactoryTest.java
index a9ccba7c..3186b428 100644
--- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactoryTest.java
@@ -27,7 +27,7 @@ import java.nio.file.Path;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.AbstractMorfologikTest;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
@@ -42,14 +42,13 @@ import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelType;
/**
- * Tests for the {@link POSTaggerFactory} class.
+ * Tests for the {@link MorfologikPOSTaggerFactory} class.
*/
-public class POSTaggerFactoryTest {
+public class MorfologikPOSTaggerFactoryTest extends AbstractMorfologikTest {
- private static ObjectStream<POSSample> createSampleStream()
- throws IOException {
+ private static ObjectStream<POSSample> createSampleStream() throws IOException {
MarkableFileInputStreamFactory sampleDataIn = new MarkableFileInputStreamFactory(
- new File(POSTaggerFactory.class.getResource("/AnnotatedSentences.txt").getFile()));
+ new File(getResource("/AnnotatedSentences.txt").getFile()));
ObjectStream<String> lineStream = null;
try {
@@ -70,26 +69,30 @@ public class POSTaggerFactoryTest {
@Test
public void testPOSTaggerWithCustomFactory() throws Exception {
- Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary();
+ Path dictionary = createMorfologikDictionary();
dictionary.toFile().deleteOnExit();
POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory();
TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile());
inFactory.setTagDictionary(inDict);
POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory);
+ Assertions.assertNotNull(posModel);
POSTaggerFactory factory = posModel.getFactory();
+ Assertions.assertNotNull(factory);
Assertions.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
- factory = null;
-
ByteArrayOutputStream out = new ByteArrayOutputStream();
posModel.serialize(out);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
POSModel fromSerialized = new POSModel(in);
+ Assertions.assertNotNull(fromSerialized);
+ // check for equality
+ Assertions.assertEquals(posModel, fromSerialized);
factory = fromSerialized.getFactory();
+ Assertions.assertNotNull(factory);
Assertions.assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
Assertions.assertEquals(2, factory.getTagDictionary().getTags("casa").length);
diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
index b3a0cbde..48d7ef5e 100644
--- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
+++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
@@ -26,10 +26,13 @@ import morfologik.stemming.Dictionary;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.AbstractMorfologikTest;
import opennlp.tools.postag.TagDictionary;
-public class MorfologikTagDictionaryTest {
+/**
+ * Tests for the {@link MorfologikTagDictionary} class.
+ */
+public class MorfologikTagDictionaryTest extends AbstractMorfologikTest {
@Test
public void testNoLemma() throws Exception {
@@ -83,7 +86,7 @@ public class MorfologikTagDictionaryTest {
private MorfologikTagDictionary createDictionary(boolean caseSensitive,
List<String> constant) throws Exception {
- Path output = POSDictionayBuilderTest.createMorfologikDictionary();
+ Path output = createMorfologikDictionary();
output.toFile().deleteOnExit();
Dictionary dic = Dictionary.read(output);
return new MorfologikTagDictionary(dic, caseSensitive);