You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/28 15:11:42 UTC
svn commit: r1306302 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref:
CorefSample.java CorefSampleDataStream.java mention/DefaultParse.java
Author: joern
Date: Wed Mar 28 13:11:41 2012
New Revision: 1306302
URL: http://svn.apache.org/viewvc?rev=1306302&view=rev
Log:
OPENNLP-56 Added coreference training format support
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java (with props)
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java (with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java?rev=1306302&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java Wed Mar 28 13:11:41 2012
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.coref;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.coref.mention.DefaultParse;
+import opennlp.tools.parser.Parse;
+
+public class CorefSample {
+
+ private List<Parse> parses;
+
+ private CorefSample(List<Parse> parses) {
+ this.parses = parses;
+ }
+
+ public List<opennlp.tools.coref.mention.Parse> getParses() {
+
+ List<opennlp.tools.coref.mention.Parse> corefParses =
+ new ArrayList<opennlp.tools.coref.mention.Parse>();
+
+ int sentNumber = 0;
+ for (Parse parse : parses) {
+ corefParses.add(new DefaultParse(parse, sentNumber++));
+ }
+
+ return corefParses;
+ }
+
+ public static CorefSample parse(String corefSampleString) {
+
+ List<Parse> parses = new ArrayList<Parse>();
+
+ for (String line : corefSampleString.split("\\r?\\n")) {
+ parses.add(Parse.parseParse(line));
+ }
+
+ return new CorefSample(parses);
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSample.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java?rev=1306302&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java Wed Mar 28 13:11:41 2012
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.coref;
+
+import java.io.IOException;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+public class CorefSampleDataStream extends FilterObjectStream<String, CorefSample> {
+
+ public CorefSampleDataStream(ObjectStream<String> in) {
+ super(in);
+ }
+
+ public CorefSample read() throws IOException {
+
+ String document = samples.read();
+
+ if (document != null) {
+ return CorefSample.parse(document);
+ }
+ else {
+ return null;
+ }
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefSampleDataStream.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java?rev=1306302&r1=1306301&r2=1306302&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/mention/DefaultParse.java Wed Mar 28 13:11:41 2012
@@ -39,7 +39,7 @@ public class DefaultParse extends Abstra
private Parse parse;
private int sentenceNumber;
private static Set<String> entitySet = new HashSet<String>(Arrays.asList(NAME_TYPES));
-
+
/**
* Initializes the current instance.
*
@@ -49,6 +49,8 @@ public class DefaultParse extends Abstra
public DefaultParse(Parse parse, int sentenceNumber) {
this.parse = parse;
this.sentenceNumber = sentenceNumber;
+
+ // Should we just maintain a parse id map !?
}
public int getSentenceNumber() {
@@ -106,6 +108,9 @@ public class DefaultParse extends Abstra
if (entitySet.contains(parse.getType())) {
return null;
}
+ else if (parse.getType().contains("#")) {
+ return parse.getType().substring(0, parse.getType().indexOf('#'));
+ }
else {
return parse.getType();
}
@@ -153,6 +158,11 @@ public class DefaultParse extends Abstra
}
public boolean isNamedEntity() {
+
+ // TODO: We should use here a special tag to, where
+ // the type can be extracted from. Then it just depends
+ // on the training data and not the values inside NAME_TYPES.
+
if (entitySet.contains(parse.getType())) {
return true;
}
@@ -162,7 +172,7 @@ public class DefaultParse extends Abstra
}
public boolean isNounPhrase() {
- return parse.getType().equals("NP");
+ return parse.getType().equals("NP") || parse.getType().startsWith("NP#");
}
public boolean isSentence() {
@@ -174,7 +184,13 @@ public class DefaultParse extends Abstra
}
public int getEntityId() {
- return -1;
+ if (parse.getType().startsWith("NP#")) {
+ String numberString = parse.getType().substring(3);
+ return Integer.parseInt(numberString);
+ }
+ else {
+ return -1;
+ }
}
public Span getSpan() {