You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2022/12/19 08:26:50 UTC
[opennlp] branch master updated: OPENNLP-1412 Provide equals and hashCode for ParserModel and TokenizerModel (#458)
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new 04048f56 OPENNLP-1412 Provide equals and hashCode for ParserModel and TokenizerModel (#458)
04048f56 is described below
commit 04048f5607a3e8c82e0aeef1dd67916d2326f43e
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Mon Dec 19 09:26:44 2022 +0100
OPENNLP-1412 Provide equals and hashCode for ParserModel and TokenizerModel (#458)
- adds specific `equals` and `hashCode` implementations for `TokenizerModel` and `ParserModel`.
- introduces new test dependency `junit-jupiter-params` (test-scoped) for opennlp-tools for more flexible test setups.
- improves `TokenizerModelTest`, `treeinsert.ParserTest` and `chunking.ParserTest` by adding further assertions and fixes TODOs.
- adds `AbstractParserModelTest` to avoid code duplication, simplifying existing test cases.
- adds another `Parse` example to demonstrate how easy it is to check different examples without duplicating code.
- removes uncommented code from both `ParserTest` variants (chunker/treeinsert) by providing actual assertions instead!
---
opennlp-tools/pom.xml | 6 ++
.../java/opennlp/tools/parser/ParserModel.java | 23 +++++
.../opennlp/tools/tokenize/TokenizerModel.java | 23 +++++
.../tools/parser/AbstractParserModelTest.java | 115 +++++++++++++++++++++
.../opennlp/tools/parser/chunking/ParserTest.java | 52 ++++------
.../tools/parser/treeinsert/ParserTest.java | 51 ++++-----
.../opennlp/tools/tokenize/TokenizerModelTest.java | 20 ++--
pom.xml | 7 ++
8 files changed, 223 insertions(+), 74 deletions(-)
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index d45ded45..d8e1b5a1 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -62,6 +62,12 @@
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ <scope>test</scope>
+ </dependency>
+
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
index 465457f8..8b892261 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
@@ -367,4 +367,27 @@ public class ParserModel extends BaseModel {
throw new InvalidFormatException("Missing the head rules!");
}
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(artifactMap.get(MANIFEST_ENTRY),
+ artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) {
+ return true;
+ }
+
+ if (obj instanceof ParserModel) {
+ ParserModel model = (ParserModel) obj;
+ Map<String, Object> artifactMapToCheck = model.artifactMap;
+ AbstractModel abstractModel = (AbstractModel) artifactMapToCheck.get(BUILD_MODEL_ENTRY_NAME);
+
+ return artifactMap.get(MANIFEST_ENTRY).equals(artifactMapToCheck.get(MANIFEST_ENTRY)) &&
+ artifactMap.get(BUILD_MODEL_ENTRY_NAME).equals(abstractModel);
+ }
+ return false;
+ }
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
index b2d5003f..201107aa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
@@ -24,6 +24,7 @@ import java.io.InputStream;
import java.net.URL;
import java.nio.file.Path;
import java.util.Map;
+import java.util.Objects;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.model.AbstractModel;
@@ -162,4 +163,26 @@ public final class TokenizerModel extends BaseModel {
public boolean useAlphaNumericOptimization() {
return getFactory() != null && getFactory().isUseAlphaNumericOptmization();
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(artifactMap.get(MANIFEST_ENTRY), artifactMap.get(TOKENIZER_MODEL_ENTRY));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) {
+ return true;
+ }
+
+ if (obj instanceof TokenizerModel) {
+ TokenizerModel model = (TokenizerModel) obj;
+ Map<String, Object> artifactMapToCheck = model.artifactMap;
+ AbstractModel abstractModel = (AbstractModel) artifactMapToCheck.get(TOKENIZER_MODEL_ENTRY);
+
+ return artifactMap.get(MANIFEST_ENTRY).equals(artifactMapToCheck.get(MANIFEST_ENTRY)) &&
+ artifactMap.get(TOKENIZER_MODEL_ENTRY).equals(abstractModel);
+ }
+ return false;
+ }
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java
new file mode 100644
index 00000000..9df819a9
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parser;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.stream.Stream;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import opennlp.tools.util.Span;
+
+/**
+ * Common test class for {@link ParserModel}-driven test cases.
+ */
+public abstract class AbstractParserModelTest {
+
+ /**
+ * @return Retrieves a valid {@link ParserModel}, either trained or loaded.
+ */
+ protected abstract ParserModel getModel();
+
+ /**
+ * Verifies that serialization of {@link ParserModel} equals trained state.
+ * <p>
+ * Tests {@link ParserModel#equals(Object)}.
+ */
+ @Test
+ void testModelSerializationAndEquality() throws IOException {
+ Assertions.assertNotNull(getModel());
+ Assertions.assertFalse(getModel().isLoadedFromSerialized());
+
+ // Test serializing and de-serializing model
+ ByteArrayOutputStream outArray = new ByteArrayOutputStream();
+ getModel().serialize(outArray);
+ outArray.close();
+
+ // TEST: de-serialization and equality
+ ParserModel outputModel = new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));
+ Assertions.assertNotNull(outputModel);
+ Assertions.assertTrue(outputModel.isLoadedFromSerialized());
+ Assertions.assertEquals(getModel(), outputModel);
+ }
+
+ /**
+ * Verifies that parsing with a {@link ParserModel} does not cause problems at runtime.
+ */
+ @ParameterizedTest(name = "Parse example {index}.")
+ @MethodSource("provideParsePairs")
+ void testParsing(String input, String reference) {
+ // prepare
+ Assertions.assertNotNull(getModel());
+ Parse p = Parse.parseParse(input);
+ Assertions.assertNotNull(p);
+ Assertions.assertTrue(p.complete());
+ Assertions.assertEquals(reference, p.getText());
+ opennlp.tools.parser.Parser parser = ParserFactory.create(getModel());
+ Assertions.assertNotNull(parser);
+
+ // TEST: parsing
+ Parse parsedViaParser = parser.parse(p);
+ Assertions.assertNotNull(parsedViaParser);
+ Assertions.assertTrue(parsedViaParser.complete());
+ Assertions.assertEquals(reference, p.getText());
+ Span s = parsedViaParser.getSpan();
+ Assertions.assertNotNull(s);
+ }
+
+ /*
+ * Produces a stream of <parse|text> pairs for parameterized unit tests.
+ */
+ private static Stream<Arguments> provideParsePairs() {
+ return Stream.of(
+ // Example 1: with eos character
+ Arguments.of("(TOP "
+ + "(S (S (NP-SBJ (PRP She) )(VP (VBD was) "
+ + "(ADVP (RB just) )(NP-PRD (NP (DT another) (NN freighter) )"
+ + "(PP (IN from) (NP (DT the) (NNPS States) )))))(, ,) "
+ + "(CC and) "
+ + "(S (NP-SBJ (PRP she) )(VP (VBD seemed) "
+ + "(ADJP-PRD (ADJP (RB as) (JJ commonplace) )(PP (IN as) (NP (PRP$ her) "
+ + "(NN name) )))))(. .) ))",
+ "She was just another freighter from the States , " +
+ "and she seemed as commonplace as her name . "),
+ // Example 2: without eos character
+ Arguments.of("(S "
+ + "(PP (IN On) (NP (NNP June) (CD 16))) "
+ + "(NP (PRP he))"
+ + "(VP (VBD was) (VP (VBN born) "
+ + "(PP in (NP Germany)))))",
+ "On June 16 he was born Germany ")
+ ) ;
+ }
+
+}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
index e82410e9..37ae93e0 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java
@@ -17,52 +17,40 @@
package opennlp.tools.parser.chunking;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
+import java.io.IOException;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import opennlp.tools.parser.AbstractParserModelTest;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.ParserTestUtil;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
/**
- * Tests for the {@link Parser} class.
+ * Tests for the {@link opennlp.tools.parser.chunking.Parser} class.
*/
-public class ParserTest {
+public class ParserTest extends AbstractParserModelTest {
- /**
- * Verify that training and tagging does not cause
- * runtime problems.
- */
- @Test
- void testChunkingParserTraining() throws Exception {
+ /* Trained dynamically before test */
+ private static ParserModel model;
+ @Override
+ protected ParserModel getModel() {
+ return model;
+ }
+
+ @BeforeAll
+ public static void setupEnvironment() throws IOException {
ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
HeadRules headRules = ParserTestUtil.createTestHeadRules();
-
- ParserModel model = Parser.train("eng", parseSamples, headRules,
- TrainingParameters.defaultParams());
-
- opennlp.tools.parser.Parser parser = ParserFactory.create(model);
-
- // TODO:
- // Tests parsing to make sure the code does not has
- // a bug which fails always with a runtime exception
- // parser.parse(Parse.parseParse("She was just another freighter from the " +
- // "States and she seemed as commonplace as her name ."));
-
- // Test serializing and de-serializing model
- ByteArrayOutputStream outArray = new ByteArrayOutputStream();
- model.serialize(outArray);
- outArray.close();
-
- ParserModel outputModel = new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));
-
- // TODO: compare both models
+ // Training an English lang 'opennlp.tools.parser.chunking.Parse'
+ model = Parser.train("eng", parseSamples, headRules, TrainingParameters.defaultParams());
+ Assertions.assertNotNull(model);
+ Assertions.assertFalse(model.isLoadedFromSerialized());
}
+
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
index f851f620..ff5c16c4 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java
@@ -17,49 +17,40 @@
package opennlp.tools.parser.treeinsert;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
+import java.io.IOException;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import opennlp.tools.parser.AbstractParserModelTest;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.ParserTestUtil;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
/**
- * Tests for the {@link Parser} class.
+ * Tests for the {@link opennlp.tools.parser.treeinsert.Parser} class.
*/
-public class ParserTest {
+public class ParserTest extends AbstractParserModelTest {
- /**
- * Verify that training and tagging does not cause
- * runtime problems.
- */
- @Test
- void testTreeInsertParserTraining() throws Exception {
+ /* Trained dynamically before test */
+ private static ParserModel model;
+ @Override
+ protected ParserModel getModel() {
+ return model;
+ }
+
+ @BeforeAll
+ public static void setupEnvironment() throws IOException {
ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
HeadRules headRules = ParserTestUtil.createTestHeadRules();
-
- ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0);
-
- opennlp.tools.parser.Parser parser = ParserFactory.create(model);
-
- // Tests parsing to make sure the code does not has
- // a bug which fails always with a runtime exception
- parser.parse(Parse.parseParse("She was just another freighter from the " +
- "States and she seemed as commonplace as her name ."));
-
- // Test serializing and de-serializing model
- ByteArrayOutputStream outArray = new ByteArrayOutputStream();
- model.serialize(outArray);
- outArray.close();
-
- new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));
-
- // TODO: compare both models
+ // Training an English lang 'opennlp.tools.parser.treeinsert.Parser'
+ model = Parser.train("eng", parseSamples, headRules, TrainingParameters.defaultParams());
+ Assertions.assertNotNull(model);
+ Assertions.assertFalse(model.isLoadedFromSerialized());
}
+
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
index a0a715eb..584657fa 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerModelTest.java
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
@@ -29,24 +30,19 @@ import org.junit.jupiter.api.Test;
public class TokenizerModelTest {
@Test
- void testSentenceModel() throws IOException {
+ void testTokenizerModelSerialization() throws IOException {
TokenizerModel model = TokenizerTestUtil.createSimpleMaxentTokenModel();
+ Assertions.assertFalse(model.isLoadedFromSerialized());
ByteArrayOutputStream arrayOut = new ByteArrayOutputStream();
model.serialize(arrayOut);
arrayOut.close();
- model = new TokenizerModel(new ByteArrayInputStream(arrayOut.toByteArray()));
- // TODO: check that both maxent models are equal
-
- // Also test serialization after building model from an inputstream
- arrayOut = new ByteArrayOutputStream();
- model.serialize(arrayOut);
- arrayOut.close();
-
- new TokenizerModel(new ByteArrayInputStream(arrayOut.toByteArray()));
-
- // TODO: check that both maxent models are equal
+ TokenizerModel modelRestored = new TokenizerModel(new ByteArrayInputStream(arrayOut.toByteArray()));
+ Assertions.assertNotNull(modelRestored);
+ Assertions.assertTrue(modelRestored.isLoadedFromSerialized());
+ Assertions.assertEquals(model, modelRestored);
+
}
}
diff --git a/pom.xml b/pom.xml
index 5c60c87f..84fb8287 100644
--- a/pom.xml
+++ b/pom.xml
@@ -108,6 +108,13 @@
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+
<dependency>
<artifactId>opennlp-tools</artifactId>
<groupId>${project.groupId}</groupId>