You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/29 15:32:35 UTC
[opennlp] branch main updated: OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)
This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 1fcb5a1e OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)
1fcb5a1e is described below
commit 1fcb5a1e4573fb7f8af4d96745806c3057769a56
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Thu Dec 29 16:32:29 2022 +0100
OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)
- resolves TODOs in `BratAnnotationStreamTest` by providing necessary assertions to show correctness
- removes inter-test dependencies
- removes useless TODO in `MockDataIndexer`
---
.../tools/formats/brat/AbstractBratTest.java | 16 +++
.../formats/brat/BratAnnotationStreamTest.java | 132 ++++++++++++++++-----
.../tools/formats/brat/BratDocumentParserTest.java | 21 ++--
.../tools/formats/brat/BratDocumentTest.java | 14 +--
.../formats/brat/BratNameSampleStreamTest.java | 32 ++---
.../opennlp/tools/ml/maxent/MockDataIndexer.java | 1 -
6 files changed, 152 insertions(+), 64 deletions(-)
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
index 53237934..5e20cb21 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
@@ -19,6 +19,8 @@ package opennlp.tools.formats.brat;
import java.io.File;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
@@ -27,12 +29,26 @@ import opennlp.tools.formats.AbstractFormatTest;
public abstract class AbstractBratTest extends AbstractFormatTest {
+ protected static final String BRAT_TYPE_PERSON = "Person";
+ protected static final String BRAT_TYPE_LOCATION = "Location";
+ protected static final String BRAT_TYPE_ORGANIZATION = "Organization";
+ protected static final String BRAT_TYPE_DATE = "Date";
+ protected static final String BRAT_TYPE_RELATED = "Related";
+ protected static final String BRAT_TYPE_ANNOTATION = "#AnnotationNote";
+
+ protected final Map<String, String> typeToClassMap = new HashMap<>();
+
protected File directory;
@BeforeEach
public void setup() throws IOException {
directory = getBratDir();
Assertions.assertNotNull(directory);
+
+ typeToClassMap.put(BRAT_TYPE_PERSON, AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put(BRAT_TYPE_LOCATION, AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put(BRAT_TYPE_ORGANIZATION, AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put(BRAT_TYPE_DATE, AnnotationConfiguration.ENTITY_TYPE);
}
private String getDirectoryAsString() {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
index d7887418..922239ea 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
@@ -17,62 +17,132 @@
package opennlp.tools.formats.brat;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.IOException;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.util.ObjectStream;
-public class BratAnnotationStreamTest extends AbstractFormatTest {
+import static org.junit.jupiter.api.Assertions.fail;
- private ObjectStream<BratAnnotation> creatBratAnnotationStream(
- AnnotationConfiguration conf, String file) {
- return new BratAnnotationStream(conf, "testing", getResourceStream(file));
- }
+public class BratAnnotationStreamTest extends AbstractBratTest {
- static void addEntityTypes(Map<String, String> typeToClassMap) {
- typeToClassMap.put("Person", AnnotationConfiguration.ENTITY_TYPE);
- typeToClassMap.put("Location", AnnotationConfiguration.ENTITY_TYPE);
- typeToClassMap.put("Organization", AnnotationConfiguration.ENTITY_TYPE);
- typeToClassMap.put("Date", AnnotationConfiguration.ENTITY_TYPE);
+ /* Expectations */
+ private static final String[] VOA_PERSONS = new String[]{
+ "Obama", "Barack Obama", "Lee Myung - bak"};
+ private static final String[] VOA_LOCATIONS = new String[]{
+ "South Korea", "North Korea", "China", "South Korean", "United States", "Pyongyang"};
+ private static final String[] VOA_DATES = new String[]{
+ "Wednesday", "Wednesday evening", "Thursday"};
+
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
}
@Test
- void testParsingEntities() throws Exception {
- Map<String, String> typeToClassMap = new HashMap<>();
- addEntityTypes(typeToClassMap);
-
+ void testParsingEntities() throws IOException {
AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
-
ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
"brat/voa-with-entities.ann");
+ Assertions.assertNotNull(annStream);
- // TODO: Test if we get the entities ... we expect!
-
- BratAnnotation ann;
- while ((ann = annStream.read()) != null) {
- System.out.println(ann);
- }
+ validateBratAnnotationStream(annStream, 5, 10, 3, 0, 2, 0);
}
@Test
- void testParsingRelations() throws Exception {
- Map<String, String> typeToClassMap = new HashMap<>();
- addEntityTypes(typeToClassMap);
- typeToClassMap.put("Related", AnnotationConfiguration.RELATION_TYPE);
+ void testParsingRelations() throws IOException {
+ // add relation type for this config
+ typeToClassMap.put(BRAT_TYPE_RELATED, AnnotationConfiguration.RELATION_TYPE);
AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
-
ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
"brat/voa-with-relations.ann");
+ Assertions.assertNotNull(annStream);
+
+ validateBratAnnotationStream(annStream, 5, 10, 3, 0, 0, 7);
+ }
+
+ private ObjectStream<BratAnnotation> creatBratAnnotationStream(AnnotationConfiguration conf,
+ String file) {
+ return new BratAnnotationStream(conf, "testing", getResourceStream(file));
+ }
- // TODO: Test if we get the entities ... we expect!
+ private void validateBratAnnotationStream(ObjectStream<BratAnnotation> annStream, int expectPersons,
+ int expectLocations, int expectDates, int expectOrganizations,
+ int expectAnnotations, int expectRelations) throws IOException {
+ int dates = 0;
+ int persons = 0;
+ int relations = 0;
+ int locations = 0;
+ int annotations = 0;
+ int organizations = 0;
+ Set<String> annotatedDates = new LinkedHashSet<>();
+ Set<String> annotatedPersons = new LinkedHashSet<>();
+ Set<String> annotatedLocations = new LinkedHashSet<>();
BratAnnotation ann;
while ((ann = annStream.read()) != null) {
- System.out.println(ann);
+ Assertions.assertNotNull(ann);
+ String type = ann.getType();
+ Assertions.assertNotNull(type);
+
+ String coveredText = null;
+ RelationAnnotation rAnnotation = null;
+ AnnotatorNoteAnnotation aAnnotation = null;
+ if (ann instanceof SpanAnnotation) {
+ SpanAnnotation sAnnotation = (SpanAnnotation) ann;
+ coveredText = sAnnotation.getCoveredText();
+ Assertions.assertNotNull(coveredText);
+ } else if (ann instanceof RelationAnnotation) {
+ rAnnotation = (RelationAnnotation) ann;
+ } else if (ann instanceof AnnotatorNoteAnnotation) {
+ aAnnotation = (AnnotatorNoteAnnotation) ann;
+ } else {
+ fail("Found object of invalid class for '" + type + "' type!");
+ }
+ switch (type) {
+ case BRAT_TYPE_PERSON: {
+ persons++;
+ annotatedPersons.add(coveredText);
+ break;
+ } case BRAT_TYPE_LOCATION: {
+ locations++;
+ annotatedLocations.add(coveredText);
+ break;
+ } case BRAT_TYPE_DATE: {
+ dates++;
+ annotatedDates.add(coveredText);
+ break;
+ } case BRAT_TYPE_ORGANIZATION: {
+ organizations++;
+ break;
+ } case BRAT_TYPE_RELATED: {
+ relations++;
+ Assertions.assertNotNull(rAnnotation);
+ break;
+ } case BRAT_TYPE_ANNOTATION: {
+ annotations++;
+ Assertions.assertNotNull(aAnnotation);
+ break;
+ } default: {
+ fail("Found an unsupported BRAT type!");
+ }
+ }
}
+ Assertions.assertEquals(expectDates, dates);
+ Assertions.assertEquals(expectPersons, persons);
+ Assertions.assertEquals(expectLocations, locations);
+ Assertions.assertEquals(expectAnnotations, annotations);
+ Assertions.assertEquals(expectOrganizations, organizations);
+ Assertions.assertEquals(expectRelations, relations);
+
+ Assertions.assertArrayEquals(VOA_DATES, annotatedDates.toArray());
+ Assertions.assertArrayEquals(VOA_PERSONS, annotatedPersons.toArray());
+ Assertions.assertArrayEquals(VOA_LOCATIONS, annotatedLocations.toArray());
}
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
index 3e58f021..6ae2ba2a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
@@ -19,40 +19,41 @@ package opennlp.tools.formats.brat;
import java.io.IOException;
import java.io.InputStream;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.AbstractFormatTest;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.NewlineSentenceDetector;
import opennlp.tools.tokenize.WhitespaceTokenizer;
-public class BratDocumentParserTest extends AbstractFormatTest {
+public class BratDocumentParserTest extends AbstractBratTest {
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ }
+
@Test
void testParse() throws IOException {
-
- Map<String, String> typeToClassMap = new HashMap<>();
- BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
InputStream annIn = getResourceStream("brat/opennlp-1193.ann");
BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
-
+ Assertions.assertNotNull(doc);
+
BratDocumentParser parser = new BratDocumentParser(new NewlineSentenceDetector(),
WhitespaceTokenizer.INSTANCE);
List<NameSample> names = parser.parse(doc);
-
Assertions.assertEquals(3, names.size());
NameSample sample1 = names.get(0);
+ Assertions.assertNotNull(sample1);
Assertions.assertEquals(1, sample1.getNames().length);
Assertions.assertEquals(0, sample1.getNames()[0].getStart());
@@ -60,11 +61,13 @@ public class BratDocumentParserTest extends AbstractFormatTest {
NameSample sample2 = names.get(1);
+ Assertions.assertNotNull(sample2);
Assertions.assertEquals(1, sample2.getNames().length);
Assertions.assertEquals(0, sample2.getNames()[0].getStart());
Assertions.assertEquals(1, sample2.getNames()[0].getEnd());
NameSample sample3 = names.get(2);
+ Assertions.assertNotNull(sample3);
Assertions.assertEquals(3, sample3.getNames().length);
Assertions.assertEquals(0, sample3.getNames()[0].getStart());
Assertions.assertEquals(1, sample3.getNames()[0].getEnd());
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
index eb0e371a..047335e5 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
@@ -19,21 +19,21 @@ package opennlp.tools.formats.brat;
import java.io.IOException;
import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Map;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import opennlp.tools.formats.AbstractFormatTest;
+public class BratDocumentTest extends AbstractBratTest {
-public class BratDocumentTest extends AbstractFormatTest {
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
+ }
@Test
void testDocumentWithEntitiesParsing() throws IOException {
- Map<String, String> typeToClassMap = new HashMap<>();
- BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
InputStream txtIn = getResourceStream("brat/voa-with-entities.txt");
@@ -67,8 +67,6 @@ public class BratDocumentTest extends AbstractFormatTest {
*/
@Test
void testSpanWithMultiFragments() throws IOException {
- Map<String, String> typeToClassMap = new HashMap<>();
- BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
index 68c82423..99693f45 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
@@ -20,11 +20,10 @@ package opennlp.tools.formats.brat;
import java.io.FileFilter;
import java.io.IOException;
import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Set;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import opennlp.tools.namefind.NameSample;
@@ -34,19 +33,9 @@ import opennlp.tools.util.ObjectStream;
public class BratNameSampleStreamTest extends AbstractBratTest {
- private BratNameSampleStream createNameSampleWith(String nameContainsFilter,
- Set<String> nameTypes) throws IOException {
- Map<String, String> typeToClassMap = new HashMap<>();
- BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
- AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
-
- FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter);
-
- ObjectStream<BratDocument> bratDocumentStream =
- new BratDocumentStream(config, directory, false, fileFilter);
-
- return new BratNameSampleStream(new NewlineSentenceDetector(),
- WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes);
+ @BeforeEach
+ public void setup() throws IOException {
+ super.setup();
}
@Test
@@ -72,6 +61,7 @@ public class BratNameSampleStreamTest extends AbstractBratTest {
NameSample sample = stream.read();
while (sample != null) {
sample = stream.read();
+ Assertions.assertNotNull(sample);
}
});
@@ -98,4 +88,16 @@ public class BratNameSampleStreamTest extends AbstractBratTest {
Assertions.assertEquals(8, count);
}
+
+ private BratNameSampleStream createNameSampleWith(String nameContainsFilter,
+ Set<String> nameTypes) throws IOException {
+ AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
+ FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter);
+
+ ObjectStream<BratDocument> bratDocumentStream =
+ new BratDocumentStream(config, directory, false, fileFilter);
+
+ return new BratNameSampleStream(new NewlineSentenceDetector(),
+ WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes);
+ }
}
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
index 881159e8..fbf0f1a6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
@@ -53,7 +53,6 @@ public class MockDataIndexer implements DataIndexer {
@Override
public String[] getOutcomeLabels() {
- // TODO Auto-generated method stub
return new String[0];
}