You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/29 15:32:35 UTC

[opennlp] branch main updated: OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/main by this push:
     new 1fcb5a1e OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)
1fcb5a1e is described below

commit 1fcb5a1e4573fb7f8af4d96745806c3057769a56
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Thu Dec 29 16:32:29 2022 +0100

    OPENNLP-1426 Improve tests in opennlp.tools.formats.brat by adding assertions (#471)
    
    - resolves TODOs in `BratAnnotationStreamTest` by providing necessary assertions to show correctness
    - removes inter-test dependencies
    - removes useless TODO in `MockDataIndexer`
---
 .../tools/formats/brat/AbstractBratTest.java       |  16 +++
 .../formats/brat/BratAnnotationStreamTest.java     | 132 ++++++++++++++++-----
 .../tools/formats/brat/BratDocumentParserTest.java |  21 ++--
 .../tools/formats/brat/BratDocumentTest.java       |  14 +--
 .../formats/brat/BratNameSampleStreamTest.java     |  32 ++---
 .../opennlp/tools/ml/maxent/MockDataIndexer.java   |   1 -
 6 files changed, 152 insertions(+), 64 deletions(-)

diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
index 53237934..5e20cb21 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/AbstractBratTest.java
@@ -19,6 +19,8 @@ package opennlp.tools.formats.brat;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
@@ -27,12 +29,26 @@ import opennlp.tools.formats.AbstractFormatTest;
 
 public abstract class AbstractBratTest extends AbstractFormatTest {
 
+  protected static final String BRAT_TYPE_PERSON = "Person";
+  protected static final String BRAT_TYPE_LOCATION = "Location";
+  protected static final String BRAT_TYPE_ORGANIZATION = "Organization";
+  protected static final String BRAT_TYPE_DATE = "Date";
+  protected static final String BRAT_TYPE_RELATED = "Related";
+  protected static final String BRAT_TYPE_ANNOTATION = "#AnnotationNote";
+
+  protected final Map<String, String> typeToClassMap = new HashMap<>();
+
   protected File directory;
 
   @BeforeEach
   public void setup() throws IOException {
     directory = getBratDir();
     Assertions.assertNotNull(directory);
+
+    typeToClassMap.put(BRAT_TYPE_PERSON, AnnotationConfiguration.ENTITY_TYPE);
+    typeToClassMap.put(BRAT_TYPE_LOCATION, AnnotationConfiguration.ENTITY_TYPE);
+    typeToClassMap.put(BRAT_TYPE_ORGANIZATION, AnnotationConfiguration.ENTITY_TYPE);
+    typeToClassMap.put(BRAT_TYPE_DATE, AnnotationConfiguration.ENTITY_TYPE);
   }
 
   private String getDirectoryAsString() {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
index d7887418..922239ea 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
@@ -17,62 +17,132 @@
 
 package opennlp.tools.formats.brat;
 
-import java.util.HashMap;
-import java.util.Map;
+import java.io.IOException;
+import java.util.LinkedHashSet;
+import java.util.Set;
 
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
-import opennlp.tools.formats.AbstractFormatTest;
 import opennlp.tools.util.ObjectStream;
 
-public class BratAnnotationStreamTest extends AbstractFormatTest {
+import static org.junit.jupiter.api.Assertions.fail;
 
-  private ObjectStream<BratAnnotation> creatBratAnnotationStream(
-      AnnotationConfiguration conf, String file) {
-    return new BratAnnotationStream(conf, "testing", getResourceStream(file));
-  }
+public class BratAnnotationStreamTest extends AbstractBratTest {
 
-  static void addEntityTypes(Map<String, String> typeToClassMap) {
-    typeToClassMap.put("Person", AnnotationConfiguration.ENTITY_TYPE);
-    typeToClassMap.put("Location", AnnotationConfiguration.ENTITY_TYPE);
-    typeToClassMap.put("Organization", AnnotationConfiguration.ENTITY_TYPE);
-    typeToClassMap.put("Date", AnnotationConfiguration.ENTITY_TYPE);
+  /* Expectations */
+  private static final String[] VOA_PERSONS = new String[]{
+      "Obama", "Barack Obama", "Lee Myung - bak"};
+  private static final String[] VOA_LOCATIONS = new String[]{
+      "South Korea", "North Korea", "China", "South Korean", "United States", "Pyongyang"};
+  private static final String[] VOA_DATES = new String[]{
+      "Wednesday", "Wednesday evening", "Thursday"};
+
+  @BeforeEach
+  public void setup() throws IOException {
+    super.setup();
   }
 
   @Test
-  void testParsingEntities() throws Exception {
-    Map<String, String> typeToClassMap = new HashMap<>();
-    addEntityTypes(typeToClassMap);
-
+  void testParsingEntities() throws IOException {
     AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
-
     ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
         "brat/voa-with-entities.ann");
+    Assertions.assertNotNull(annStream);
 
-    // TODO: Test if we get the entities ... we expect!
-
-    BratAnnotation ann;
-    while ((ann = annStream.read()) != null) {
-      System.out.println(ann);
-    }
+    validateBratAnnotationStream(annStream, 5, 10, 3, 0, 2, 0);
   }
 
   @Test
-  void testParsingRelations() throws Exception {
-    Map<String, String> typeToClassMap = new HashMap<>();
-    addEntityTypes(typeToClassMap);
-    typeToClassMap.put("Related", AnnotationConfiguration.RELATION_TYPE);
+  void testParsingRelations() throws IOException {
+    // add relation type for this config
+    typeToClassMap.put(BRAT_TYPE_RELATED, AnnotationConfiguration.RELATION_TYPE);
 
     AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
-
     ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
         "brat/voa-with-relations.ann");
+    Assertions.assertNotNull(annStream);
+
+    validateBratAnnotationStream(annStream, 5, 10, 3, 0, 0, 7);
+  }
+
+  private ObjectStream<BratAnnotation> creatBratAnnotationStream(AnnotationConfiguration conf,
+                                                                 String file) {
+    return new BratAnnotationStream(conf, "testing", getResourceStream(file));
+  }
 
-    // TODO: Test if we get the entities ... we expect!
+  private void validateBratAnnotationStream(ObjectStream<BratAnnotation> annStream, int expectPersons,
+                                            int expectLocations, int expectDates, int expectOrganizations,
+                                            int expectAnnotations, int expectRelations) throws IOException {
+    int dates = 0;
+    int persons = 0;
+    int relations = 0;
+    int locations = 0;
+    int annotations = 0;
+    int organizations = 0;
+    Set<String> annotatedDates = new LinkedHashSet<>();
+    Set<String> annotatedPersons = new LinkedHashSet<>();
+    Set<String> annotatedLocations = new LinkedHashSet<>();
 
     BratAnnotation ann;
     while ((ann = annStream.read()) != null) {
-      System.out.println(ann);
+      Assertions.assertNotNull(ann);
+      String type = ann.getType();
+      Assertions.assertNotNull(type);
+
+      String coveredText = null;
+      RelationAnnotation rAnnotation = null;
+      AnnotatorNoteAnnotation aAnnotation = null;
+      if (ann instanceof SpanAnnotation) {
+        SpanAnnotation sAnnotation = (SpanAnnotation) ann;
+        coveredText = sAnnotation.getCoveredText();
+        Assertions.assertNotNull(coveredText);
+      } else if (ann instanceof RelationAnnotation) {
+        rAnnotation = (RelationAnnotation) ann;
+      } else if (ann instanceof AnnotatorNoteAnnotation) {
+        aAnnotation = (AnnotatorNoteAnnotation) ann;
+      } else {
+        fail("Found object of invalid class for '" + type + "' type!");
+      }
+      switch (type) {
+        case BRAT_TYPE_PERSON: {
+          persons++;
+          annotatedPersons.add(coveredText);
+          break;
+        } case BRAT_TYPE_LOCATION: {
+          locations++;
+          annotatedLocations.add(coveredText);
+          break;
+        } case BRAT_TYPE_DATE: {
+          dates++;
+          annotatedDates.add(coveredText);
+          break;
+        } case BRAT_TYPE_ORGANIZATION: {
+          organizations++;
+          break;
+        } case BRAT_TYPE_RELATED: {
+          relations++;
+          Assertions.assertNotNull(rAnnotation);
+          break;
+        } case BRAT_TYPE_ANNOTATION: {
+          annotations++;
+          Assertions.assertNotNull(aAnnotation);
+          break;
+        } default: {
+          fail("Found an unsupported BRAT type!");
+        }
+      }
     }
+    Assertions.assertEquals(expectDates, dates);
+    Assertions.assertEquals(expectPersons, persons);
+    Assertions.assertEquals(expectLocations, locations);
+    Assertions.assertEquals(expectAnnotations, annotations);
+    Assertions.assertEquals(expectOrganizations, organizations);
+    Assertions.assertEquals(expectRelations, relations);
+
+    Assertions.assertArrayEquals(VOA_DATES, annotatedDates.toArray());
+    Assertions.assertArrayEquals(VOA_PERSONS, annotatedPersons.toArray());
+    Assertions.assertArrayEquals(VOA_LOCATIONS, annotatedLocations.toArray());
   }
 }
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
index 3e58f021..6ae2ba2a 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentParserTest.java
@@ -19,40 +19,41 @@ package opennlp.tools.formats.brat;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
-import opennlp.tools.formats.AbstractFormatTest;
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.sentdetect.NewlineSentenceDetector;
 import opennlp.tools.tokenize.WhitespaceTokenizer;
 
-public class BratDocumentParserTest extends AbstractFormatTest {
+public class BratDocumentParserTest extends AbstractBratTest {
 
+  @BeforeEach
+  public void setup() throws IOException {
+    super.setup();
+  }
+  
   @Test
   void testParse() throws IOException {
-
-    Map<String, String> typeToClassMap = new HashMap<>();
-    BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
     AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
 
     InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
     InputStream annIn = getResourceStream("brat/opennlp-1193.ann");
 
     BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
-
+    Assertions.assertNotNull(doc);
+    
     BratDocumentParser parser = new BratDocumentParser(new NewlineSentenceDetector(),
         WhitespaceTokenizer.INSTANCE);
 
     List<NameSample> names = parser.parse(doc);
-
     Assertions.assertEquals(3, names.size());
 
     NameSample sample1 = names.get(0);
+    Assertions.assertNotNull(sample1);
 
     Assertions.assertEquals(1, sample1.getNames().length);
     Assertions.assertEquals(0, sample1.getNames()[0].getStart());
@@ -60,11 +61,13 @@ public class BratDocumentParserTest extends AbstractFormatTest {
 
 
     NameSample sample2 = names.get(1);
+    Assertions.assertNotNull(sample2);
     Assertions.assertEquals(1, sample2.getNames().length);
     Assertions.assertEquals(0, sample2.getNames()[0].getStart());
     Assertions.assertEquals(1, sample2.getNames()[0].getEnd());
 
     NameSample sample3 = names.get(2);
+    Assertions.assertNotNull(sample3);
     Assertions.assertEquals(3, sample3.getNames().length);
     Assertions.assertEquals(0, sample3.getNames()[0].getStart());
     Assertions.assertEquals(1, sample3.getNames()[0].getEnd());
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
index eb0e371a..047335e5 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
@@ -19,21 +19,21 @@ package opennlp.tools.formats.brat;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Map;
 
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
-import opennlp.tools.formats.AbstractFormatTest;
+public class BratDocumentTest extends AbstractBratTest {
 
-public class BratDocumentTest extends AbstractFormatTest {
+  @BeforeEach
+  public void setup() throws IOException {
+    super.setup();
+  }
 
   @Test
   void testDocumentWithEntitiesParsing() throws IOException {
 
-    Map<String, String> typeToClassMap = new HashMap<>();
-    BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
     AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
 
     InputStream txtIn = getResourceStream("brat/voa-with-entities.txt");
@@ -67,8 +67,6 @@ public class BratDocumentTest extends AbstractFormatTest {
    */
   @Test
   void testSpanWithMultiFragments() throws IOException {
-    Map<String, String> typeToClassMap = new HashMap<>();
-    BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
     AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
 
     InputStream txtIn = getResourceStream("brat/opennlp-1193.txt");
diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
index 68c82423..99693f45 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratNameSampleStreamTest.java
@@ -20,11 +20,10 @@ package opennlp.tools.formats.brat;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Set;
 
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import opennlp.tools.namefind.NameSample;
@@ -34,19 +33,9 @@ import opennlp.tools.util.ObjectStream;
 
 public class BratNameSampleStreamTest extends AbstractBratTest {
 
-  private BratNameSampleStream createNameSampleWith(String nameContainsFilter,
-                                                    Set<String> nameTypes) throws IOException {
-    Map<String, String> typeToClassMap = new HashMap<>();
-    BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
-    AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
-
-    FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter);
-
-    ObjectStream<BratDocument> bratDocumentStream =
-            new BratDocumentStream(config, directory, false, fileFilter);
-
-    return new BratNameSampleStream(new NewlineSentenceDetector(),
-        WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes);
+  @BeforeEach
+  public void setup() throws IOException {
+    super.setup();
   }
 
   @Test
@@ -72,6 +61,7 @@ public class BratNameSampleStreamTest extends AbstractBratTest {
       NameSample sample = stream.read();
       while (sample != null) {
         sample = stream.read();
+        Assertions.assertNotNull(sample);
       }
     });
 
@@ -98,4 +88,16 @@ public class BratNameSampleStreamTest extends AbstractBratTest {
 
     Assertions.assertEquals(8, count);
   }
+
+  private BratNameSampleStream createNameSampleWith(String nameContainsFilter,
+                                                    Set<String> nameTypes) throws IOException {
+    AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
+    FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter);
+
+    ObjectStream<BratDocument> bratDocumentStream =
+            new BratDocumentStream(config, directory, false, fileFilter);
+
+    return new BratNameSampleStream(new NewlineSentenceDetector(),
+            WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes);
+  }
 }
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
index 881159e8..fbf0f1a6 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/MockDataIndexer.java
@@ -53,7 +53,6 @@ public class MockDataIndexer implements DataIndexer {
 
   @Override
   public String[] getOutcomeLabels() {
-    // TODO Auto-generated method stub
     return new String[0];
   }