You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/16 16:52:45 UTC
svn commit: r1523687 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat:
AnnotationConfiguration.java BratNameSampleStreamFactory.java
Author: joern
Date: Mon Sep 16 14:52:45 2013
New Revision: 1523687
URL: http://svn.apache.org/r1523687
Log:
OPENNLP-560 Added parser for the annotation.conf file and removed hard coded test configuration
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java?rev=1523687&r1=1523686&r2=1523687&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java Mon Sep 16 14:52:45 2013
@@ -17,6 +17,11 @@
package opennlp.tools.formats.brat;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -39,5 +44,36 @@ public class AnnotationConfiguration {
return typeToClassMap.get(type);
}
- // TODO: Add a parser for the brat configuration file!
+
+ public static AnnotationConfiguration parse(InputStream in) throws IOException {
+ Map<String, String> typeToClassMap = new HashMap<String, String>();
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+
+ // Note: This only supports entities and relations section
+ String line = null;
+ String sectionType = null;
+
+ while ((line = reader.readLine())!= null) {
+ line = line.trim();
+
+ if (line.isEmpty()) {
+ continue;
+ } else if (line.startsWith("#")) {
+ continue;
+ } else if (line.startsWith("[") && line.endsWith("]")) {
+ sectionType = line.substring(line.indexOf('[') + 1, line.indexOf(']'));
+ }
+ else {
+ if ("entities".equals(sectionType)) {
+ typeToClassMap.put(line, AnnotationConfiguration.ENTITY_TYPE);
+ }
+ else if ("relations".equals(sectionType)) {
+ typeToClassMap.put(line.substring(0, line.indexOf(' ')), AnnotationConfiguration.RELATION_TYPE);
+ }
+ }
+ }
+
+ return new AnnotationConfiguration(typeToClassMap);
+ }
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java?rev=1523687&r1=1523686&r2=1523687&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java Mon Sep 16 14:52:45 2013
@@ -18,17 +18,16 @@
package opennlp.tools.formats.brat;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.InputStream;
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
-import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.formats.AbstractSampleStreamFactory;
-import opennlp.tools.formats.muc.Muc6NameSampleStreamFactory;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.NewlineSentenceDetector;
import opennlp.tools.sentdetect.SentenceDetector;
@@ -47,6 +46,9 @@ public class BratNameSampleStreamFactory
@ParameterDescription(valueName = "bratDataDir", description = "location of brat data dir")
File getBratDataDir();
+ @ParameterDescription(valueName = "annConfFile")
+ File getAnnotationConfig();
+
@ParameterDescription(valueName = "modelFile")
@OptionalParameter
File getSentenceDetectorModel();
@@ -54,7 +56,7 @@ public class BratNameSampleStreamFactory
@ParameterDescription(valueName = "modelFile")
@OptionalParameter
File getTokenizerModel();
-
+
@ParameterDescription(valueName = "name")
@OptionalParameter
String getRuleBasedTokenizer();
@@ -62,6 +64,7 @@ public class BratNameSampleStreamFactory
@ParameterDescription(valueName = "value")
@OptionalParameter(defaultValue = "false")
Boolean getRecursive();
+
}
protected BratNameSampleStreamFactory() {
@@ -92,21 +95,23 @@ public class BratNameSampleStreamFactory
throw new TerminateToolException(-1, "Either use rule based or statistical tokenizer!");
}
- // TODO: This need to be loaded from the real file ...
- Map<String, String> typeToClassMap = new HashMap<String, String>();
-
- typeToClassMap.put("bumblebee_annotations_Person", "Entity");
- typeToClassMap.put("bumblebee_annotations_Organization", "Entity");
- typeToClassMap.put("bumblebee_annotations_DateMention", "Entity");
- typeToClassMap.put("bumblebee_annotations_Location", "Entity");
- typeToClassMap.put("bumblebee_annotations_CRN", "Entity");
- typeToClassMap.put("bumblebee_annotations_Money", "Entity");
- typeToClassMap.put("bumblebee_annotations_LocatedAt", AnnotationConfiguration.RELATION_TYPE);
- typeToClassMap.put("bumblebee_annotations_BornIn", AnnotationConfiguration.RELATION_TYPE);
- typeToClassMap.put("bumblebee_annotations_BornOn", AnnotationConfiguration.RELATION_TYPE);
- typeToClassMap.put("bumblebee_annotations_MemberOf", AnnotationConfiguration.RELATION_TYPE);
-
- AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
+ // TODO: Provide the file name to the annotation.conf file and implement the parser ...
+ AnnotationConfiguration annConfig;
+ InputStream annConfIn = null;
+ try {
+ annConfIn = new FileInputStream(params.getAnnotationConfig());
+ annConfig = AnnotationConfiguration.parse(annConfIn);
+ }
+ catch (IOException e) {
+ throw new TerminateToolException(1, "Failed to parse annotation.conf file!");
+ }
+ finally {
+ if (annConfIn != null) {
+ try {
+ annConfIn.close();
+ } catch (IOException e) {}
+ }
+ }
// TODO: Add an optional parameter to search recursive
// TODO: How to handle the error here ? terminate the tool? not nice if used by API!