You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/28 15:11:42 UTC

svn commit: r1306302 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref: CorefSample.java CorefSampleDataStream.java mention/DefaultParse.java

Author: joern
Date: Wed Mar 28 13:11:41 2012
New Revision: 1306302

URL: http://svn.apache.org/viewvc?rev=1306302&view=rev
Log:
OPENNLP-56 Added coreference training format support

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java   (with props)
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java   (with props)
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java?rev=1306302&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java Wed Mar 28 13:11:41 2012
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.coref;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.coref.mention.DefaultParse;
+import opennlp.tools.parser.Parse;
+
+public class CorefSample {
+
+  private List<Parse> parses;
+
+   private CorefSample(List<Parse> parses) {
+     this.parses = parses;
+   }
+  
+  public List<opennlp.tools.coref.mention.Parse> getParses() {
+    
+    List<opennlp.tools.coref.mention.Parse> corefParses =
+        new ArrayList<opennlp.tools.coref.mention.Parse>();
+    
+    int sentNumber = 0;
+    for (Parse parse : parses) {
+      corefParses.add(new DefaultParse(parse, sentNumber++));
+    }
+    
+    return corefParses;
+  }
+  
+  public static CorefSample parse(String corefSampleString) {
+    
+    List<Parse> parses = new ArrayList<Parse>();
+    
+    for (String line : corefSampleString.split("\\r?\\n")) {
+      parses.add(Parse.parseParse(line));
+    }
+    
+    return new CorefSample(parses);
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java?rev=1306302&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java Wed Mar 28 13:11:41 2012
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.coref;
+
+import java.io.IOException;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+public class CorefSampleDataStream extends FilterObjectStream<String, CorefSample> {
+
+  public CorefSampleDataStream(ObjectStream<String> in) {
+    super(in);
+  }
+  
+  public CorefSample read() throws IOException {
+    
+    String document = samples.read();
+    
+    if (document != null) {
+      return CorefSample.parse(document);
+    }
+    else {
+      return null;
+    }
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java?rev=1306302&r1=1306301&r2=1306302&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java Wed Mar 28 13:11:41 2012
@@ -39,7 +39,7 @@ public class DefaultParse extends Abstra
   private Parse parse;
   private int sentenceNumber;
   private static Set<String> entitySet = new HashSet<String>(Arrays.asList(NAME_TYPES));
-
+  
   /**
    * Initializes the current instance.
    *
@@ -49,6 +49,8 @@ public class DefaultParse extends Abstra
   public DefaultParse(Parse parse, int sentenceNumber) {
     this.parse = parse;
     this.sentenceNumber = sentenceNumber;
+    
+    // Should we just maintain a parse id map !?
   }
 
   public int getSentenceNumber() {
@@ -106,6 +108,9 @@ public class DefaultParse extends Abstra
     if (entitySet.contains(parse.getType())) {
       return null;
     }
+    else if (parse.getType().contains("#")) {
+      return parse.getType().substring(0, parse.getType().indexOf('#'));
+    }
     else {
       return parse.getType();
     }
@@ -153,6 +158,11 @@ public class DefaultParse extends Abstra
   }
 
   public boolean isNamedEntity() {
+    
+    // TODO: We should use here a special tag to, where
+    // the type can be extracted from. Then it just depends
+    // on the training data and not the values inside NAME_TYPES.
+    
     if (entitySet.contains(parse.getType())) {
       return true;
     }
@@ -162,7 +172,7 @@ public class DefaultParse extends Abstra
   }
 
   public boolean isNounPhrase() {
-    return parse.getType().equals("NP");
+    return parse.getType().equals("NP") || parse.getType().startsWith("NP#");
   }
 
   public boolean isSentence() {
@@ -174,7 +184,13 @@ public class DefaultParse extends Abstra
   }
 
   public int getEntityId() {
-    return -1;
+    if (parse.getType().startsWith("NP#")) {
+      String numberString = parse.getType().substring(3);
+      return Integer.parseInt(numberString);
+    }
+    else {
+      return -1;
+    }
   }
 
   public Span getSpan() {