You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/05/17 14:15:27 UTC
svn commit: r1483761 - in /opennlp/trunk/opennlp-tools/src/test:
java/opennlp/tools/formats/brat/ resources/opennlp/tools/formats/brat/
Author: joern
Date: Fri May 17 12:15:27 2013
New Revision: 1483761
URL: http://svn.apache.org/r1483761
Log:
OPENNLP-560 Initial check in of brat format parsing code for the name finder.
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java (with props)
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java (with props)
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt (with props)
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt (with props)
Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java Fri May 17 12:15:27 2013
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.brat;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.util.ObjectStream;
+
+import org.junit.Test;
+
+public class BratAnnotationStreamTest {
+
+ private ObjectStream<BratAnnotation> creatBratAnnotationStream(
+ AnnotationConfiguration conf, String file) {
+
+ InputStream in = BratAnnotationStreamTest.class.getResourceAsStream(
+ file);
+
+ return new BratAnnotationStream(conf, "testing", in);
+ }
+
+
+ static void addEntityTypes(Map<String, String> typeToClassMap) {
+ typeToClassMap.put("Person", AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put("Location", AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put("Organization", AnnotationConfiguration.ENTITY_TYPE);
+ typeToClassMap.put("Date", AnnotationConfiguration.ENTITY_TYPE);
+ }
+
+ @Test
+ public void testParsingEntities() throws Exception {
+
+ Map<String, String> typeToClassMap = new HashMap<String, String>();
+ addEntityTypes(typeToClassMap);
+
+ AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
+
+ ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
+ "/opennlp/tools/formats/brat/voa-with-entities.ann");
+
+ // TODO: Test if we get the entities ... we expect!
+
+ BratAnnotation ann;
+ while ((ann = annStream.read()) != null) {
+ System.out.println(ann);
+ }
+ }
+
+ @Test
+ public void testParsingRelations() throws Exception {
+
+ Map<String, String> typeToClassMap = new HashMap<String, String>();
+ addEntityTypes(typeToClassMap);
+ typeToClassMap.put("Related", AnnotationConfiguration.RELATION_TYPE);
+
+ AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
+
+ ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig,
+ "/opennlp/tools/formats/brat/voa-with-relations.ann");
+
+ // TODO: Test if we get the entities ... we expect!
+
+ BratAnnotation ann;
+ while ((ann = annStream.read()) != null) {
+ System.out.println(ann);
+ }
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratAnnotationStreamTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java Fri May 17 12:15:27 2013
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.brat;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+public class BratDocumentTest {
+
+ @Test
+ public void testDocumentWithEntitiesParsing() throws IOException {
+
+ Map<String, String> typeToClassMap = new HashMap<String, String>();
+ BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
+ AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
+
+ InputStream txtIn = BratDocumentTest.class.getResourceAsStream(
+ "/opennlp/tools/formats/brat/voa-with-entities.txt");
+
+ InputStream annIn = BratDocumentTest.class.getResourceAsStream(
+ "/opennlp/tools/formats/brat/voa-with-entities.ann");
+
+ BratDocument doc = BratDocument.parseDocument(config, "voa-with-entities", txtIn, annIn);
+
+ assertEquals("voa-with-entities", doc.getId());
+ assertTrue(doc.getText().startsWith(" U . S . President "));
+ assertTrue(doc.getText().endsWith("multinational process . \n"));
+
+ assertEquals(18, doc.getAnnotations().size());
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/brat/BratDocumentTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann (added)
+++ opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.ann Fri May 17 12:15:27 2013
@@ -0,0 +1,18 @@
+T1 Person 281 286 Obama
+T2 Person 21 33 Barack Obama
+T3 Location 51 62 South Korea
+T4 Location 151 162 North Korea
+T5 Location 231 236 China
+T6 Location 243 254 South Korea
+T7 Location 322 333 North Korea
+T8 Date 257 266 Wednesday
+T9 Location 386 397 North Korea
+T10 Person 586 591 Obama
+T11 Date 843 860 Wednesday evening
+T12 Location 889 901 South Korean
+T13 Person 913 928 Lee Myung - bak
+T14 Date 931 939 Thursday
+T15 Location 978 989 South Korea
+T16 Location 1000 1013 United States
+T17 Person 1121 1126 Obama
+T18 Location 1168 1177 Pyongyang
Added: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt (added)
+++ opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt Fri May 17 12:15:27 2013
@@ -0,0 +1,8 @@
+ U . S . President Barack Obama has arrived in South Korea , where he is expected to show solidarity with the country ' s president in demanding North Korea move toward ending its nuclear weapons programs .
+As he departed China for South Korea Wednesday , President Obama took another opportunity to urge North Korea to reach an agreement on its nuclear weapons .
+" North Korea has a choice .
+It can continue down the path of confrontation and provocation that has led to less security , less prosperity and more isolation from the global community , " President Obama said .
+" Or it can choose to become a full member of the international community , which will give a better life to its people by living up to international obligations and foregoing nuclear weapons . "
+The president landed at a U . S . air base Wednesday evening , and is to hold talks with South Korean President Lee Myung - bak Thursday here in the South Korean capital .
+ South Korea and the United States are trying to coax the North back to six - nation talks aimed at ending its nuclear weapons .
+President Obama has indicated he will send an envoy to Pyongyang before the end of the year for one - on - one discussions , but only in the context of restarting the multinational process .
Propchange: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-entities.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann (added)
+++ opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.ann Fri May 17 12:15:27 2013
@@ -0,0 +1,25 @@
+T1 Person 281 286 Obama
+T2 Person 21 33 Barack Obama
+T3 Location 51 62 South Korea
+T4 Location 151 162 North Korea
+T5 Location 231 236 China
+T6 Location 243 254 South Korea
+T7 Location 322 333 North Korea
+T8 Date 257 266 Wednesday
+T9 Location 386 397 North Korea
+T10 Person 586 591 Obama
+T11 Date 843 860 Wednesday evening
+T12 Location 889 901 South Korean
+T13 Person 913 928 Lee Myung - bak
+T14 Date 931 939 Thursday
+T15 Location 978 989 South Korea
+T16 Location 1000 1013 United States
+T17 Person 1121 1126 Obama
+T18 Location 1168 1177 Pyongyang
+R1 Related Arg1:T2 Arg2:T3
+R2 Related Arg1:T1 Arg2:T7
+R3 Related Arg1:T13 Arg2:T12
+R4 Related Arg1:T17 Arg2:T18
+R5 Related Arg1:T2 Arg2:T4
+R6 Related Arg1:T2 Arg2:T5
+R7 Related Arg1:T2 Arg2:T6
Added: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt?rev=1483761&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt (added)
+++ opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt Fri May 17 12:15:27 2013
@@ -0,0 +1,9 @@
+ U . S . President Barack Obama has arrived in South Korea , where he is expected to show solidarity with the country ' s president in demanding North Korea move toward ending its nuclear weapons programs .
+As he departed China for South Korea Wednesday , President Obama took another opportunity to urge North Korea to reach an agreement on its nuclear weapons .
+" North Korea has a choice .
+It can continue down the path of confrontation and provocation that has led to less security , less prosperity and more isolation from the global community , " President Obama said .
+" Or it can choose to become a full member of the international community , which will give a better life to its people by living up to international obligations and foregoing nuclear weapons . "
+The president landed at a U . S . air base Wednesday evening , and is to hold talks with South Korean President Lee Myung - bak Thursday here in the South Korean capital .
+ South Korea and the United States are trying to coax the North back to six - nation talks aimed at ending its nuclear weapons .
+President Obama has indicated he will send an envoy to Pyongyang before the end of the year for one - on - one discussions , but only in the context of restarting the multinational process .
+
Propchange: opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/formats/brat/voa-with-relations.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain