You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/03/04 12:10:40 UTC
[opennlp] 01/01: OPENNLP-1333 Write unit test for parser top "k" parses
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch OPENNLP-1333_Write_unit_test_for_parser_top_k_parses
in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit d2f58caed08793f2ea6aff4c7fc3a020a8f32898
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Sat Mar 4 13:10:27 2023 +0100
OPENNLP-1333 Write unit test for parser top "k" parses
- provides new test cases for `k = 1, 2, 3` for both Parser implementations
- uses test data from https://github.com/apache/opennlp/pull/392
- adds `toStringPennTreebank` in `Parse` to obtain a uniform string representation for verification or comparison
---
.../src/main/java/opennlp/tools/parser/Parse.java | 9 ++++
.../tools/parser/AbstractParserModelTest.java | 58 ++++++++++++++++++++++
2 files changed, 67 insertions(+)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
index 2e5d873c..33880baf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
@@ -449,6 +449,15 @@ public class Parse implements Cloneable, Comparable<Parse> {
return text.substring(span.getStart(), span.getEnd());
}
+ /**
+ * @return Retrieves a String representation using Penn Treebank-style formatting.
+ */
+ public String toStringPennTreebank() {
+ StringBuffer buffer = new StringBuffer();
+ show(buffer);
+ return buffer.toString();
+ }
+
/**
* Represents this {@link Parse} in a human-readable way.
*/
diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java
index 9df819a9..346e4eb7 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/parser/AbstractParserModelTest.java
@@ -20,6 +20,9 @@ package opennlp.tools.parser;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
@@ -27,7 +30,9 @@ import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.Span;
/**
@@ -86,6 +91,59 @@ public abstract class AbstractParserModelTest {
Assertions.assertNotNull(s);
}
+ /*
+ * Verifies changes in OPENNLP-1330 and addresses follow-up OPENNLP-1333
+ * See: https://issues.apache.org/jira/projects/OPENNLP/issues/OPENNLP-1333
+ *
+ * Uses test data from PR 392 (https://github.com/apache/opennlp/pull/392).
+ */
+ @ParameterizedTest
+ @ValueSource(ints = {1, 2, 3})
+ void testParsingTopParses(int k) {
+ // fixtures
+ final String sent = "Eric is testing.";
+ final String refParseTopChunking =
+ "(TOP (S (NP (NNP Eric)) (VP (VBZ is) (NP (DT testing.)))))";
+ final String refParseTopTreeInsert =
+ "(TOP (S (NP (NNP Eric)) (VP (VBZ is) (NN testing.))))";
+
+ // prepare
+ List<String> tokens = Arrays.asList(WhitespaceTokenizer.INSTANCE.tokenize(sent));
+ String text = String.join(" ", tokens);
+
+ Parse sentP = new Parse(text, new Span(0, text.length()),
+ AbstractBottomUpParser.INC_NODE, 0, 0);
+ int start = 0;
+ int i = 0;
+ for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) {
+ String tok = ti.next();
+ sentP.insert(new Parse(text, new Span(start, start + tok.length()),
+ AbstractBottomUpParser.TOK_NODE, 0, i));
+ start += tok.length() + 1;
+ }
+
+ opennlp.tools.parser.Parser parser = ParserFactory.create(getModel());
+ Assertions.assertNotNull(parser);
+
+ // TEST: parsing
+ Parse[] parses = parser.parse(sentP, k);
+ Assertions.assertNotNull(parses);
+ Assertions.assertEquals(k, parses.length);
+ double previousProb = 0; // initial ref value
+ for (int j = 0; j < parses.length; j++) {
+ Assertions.assertTrue(parses[j].getProb() < previousProb);
+ String asPennTreebankStyle = parses[j].toStringPennTreebank();
+ // System.out.println(parses[j].getProb() + " - " + asPennTreebankStyle);
+ if (j == 0) {
+ if (ParserType.CHUNKING.equals(getModel().getParserType())) {
+ Assertions.assertEquals(refParseTopChunking, asPennTreebankStyle);
+ } else if (ParserType.TREEINSERT.equals(getModel().getParserType())) {
+ Assertions.assertEquals(refParseTopTreeInsert, asPennTreebankStyle);
+ }
+ }
+ }
+ }
+
/*
* Produces a stream of <parse|text> pairs for parameterized unit tests.
*/