You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/16 16:52:45 UTC

svn commit: r1523687 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat: AnnotationConfiguration.java BratNameSampleStreamFactory.java

Author: joern
Date: Mon Sep 16 14:52:45 2013
New Revision: 1523687

URL: http://svn.apache.org/r1523687
Log:
OPENNLP-560 Added parser for the annotation.conf file and removed hard coded test configuration

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java?rev=1523687&r1=1523686&r2=1523687&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java Mon Sep 16 14:52:45 2013
@@ -17,6 +17,11 @@
 
 package opennlp.tools.formats.brat;
 
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
@@ -39,5 +44,36 @@ public class AnnotationConfiguration {
     return typeToClassMap.get(type);
   }
   
-  // TODO: Add a parser for the brat configuration file!
+  
+  public static AnnotationConfiguration parse(InputStream in) throws IOException {
+    Map<String, String> typeToClassMap = new HashMap<String, String>();
+    
+    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+    
+    // Note: This only supports entities and relations section
+    String line = null;
+    String sectionType = null;
+    
+    while ((line = reader.readLine())!= null) {
+      line = line.trim();
+      
+      if (line.isEmpty()) {
+        continue;
+      } else if (line.startsWith("#")) {
+        continue;
+      } else if (line.startsWith("[") && line.endsWith("]")) {
+        sectionType = line.substring(line.indexOf('[') + 1, line.indexOf(']'));
+      }
+      else {
+        if ("entities".equals(sectionType)) {
+          typeToClassMap.put(line, AnnotationConfiguration.ENTITY_TYPE);
+        }
+        else if ("relations".equals(sectionType)) {
+          typeToClassMap.put(line.substring(0, line.indexOf(' ')), AnnotationConfiguration.RELATION_TYPE);
+        }
+      }
+    }
+    
+    return new AnnotationConfiguration(typeToClassMap);
+  }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java?rev=1523687&r1=1523686&r2=1523687&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java Mon Sep 16 14:52:45 2013
@@ -18,17 +18,16 @@
 package opennlp.tools.formats.brat;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.InputStream;
 
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
-import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.formats.AbstractSampleStreamFactory;
-import opennlp.tools.formats.muc.Muc6NameSampleStreamFactory;
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.sentdetect.NewlineSentenceDetector;
 import opennlp.tools.sentdetect.SentenceDetector;
@@ -47,6 +46,9 @@ public class BratNameSampleStreamFactory
     @ParameterDescription(valueName = "bratDataDir", description = "location of brat data dir")
     File getBratDataDir();
 
+    @ParameterDescription(valueName = "annConfFile")
+    File getAnnotationConfig();
+    
     @ParameterDescription(valueName = "modelFile")
     @OptionalParameter
     File getSentenceDetectorModel();
@@ -54,7 +56,7 @@ public class BratNameSampleStreamFactory
     @ParameterDescription(valueName = "modelFile")
     @OptionalParameter
     File getTokenizerModel();
-
+    
     @ParameterDescription(valueName = "name")
     @OptionalParameter
     String getRuleBasedTokenizer();
@@ -62,6 +64,7 @@ public class BratNameSampleStreamFactory
     @ParameterDescription(valueName = "value")
     @OptionalParameter(defaultValue = "false")
     Boolean getRecursive();
+    
   }
   
   protected BratNameSampleStreamFactory() {
@@ -92,21 +95,23 @@ public class BratNameSampleStreamFactory
       throw new TerminateToolException(-1, "Either use rule based or statistical tokenizer!");
     }
     
-    // TODO: This need to be loaded from the real file ...
-    Map<String, String> typeToClassMap = new HashMap<String, String>();
-    
-    typeToClassMap.put("bumblebee_annotations_Person", "Entity");
-    typeToClassMap.put("bumblebee_annotations_Organization", "Entity");
-    typeToClassMap.put("bumblebee_annotations_DateMention", "Entity");
-    typeToClassMap.put("bumblebee_annotations_Location", "Entity");
-    typeToClassMap.put("bumblebee_annotations_CRN", "Entity");
-    typeToClassMap.put("bumblebee_annotations_Money", "Entity");
-    typeToClassMap.put("bumblebee_annotations_LocatedAt", AnnotationConfiguration.RELATION_TYPE);
-    typeToClassMap.put("bumblebee_annotations_BornIn", AnnotationConfiguration.RELATION_TYPE);
-    typeToClassMap.put("bumblebee_annotations_BornOn", AnnotationConfiguration.RELATION_TYPE);
-    typeToClassMap.put("bumblebee_annotations_MemberOf", AnnotationConfiguration.RELATION_TYPE);
-    
-    AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap);
+    // TODO: Provide the file name to the annotation.conf file and implement the parser ...
+    AnnotationConfiguration annConfig;
+    InputStream annConfIn = null;
+    try {
+      annConfIn = new FileInputStream(params.getAnnotationConfig());
+      annConfig = AnnotationConfiguration.parse(annConfIn);
+    }
+    catch (IOException e) {
+      throw new TerminateToolException(1, "Failed to parse annotation.conf file!");
+    }
+    finally {
+      if (annConfIn != null) {
+        try {
+          annConfIn.close();
+        } catch (IOException e) {}
+      }
+    }
     
     // TODO: Add an optional parameter to search recursive
     // TODO: How to handle the error here ? terminate the tool? not nice if used by API!