You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/18 17:11:18 UTC
svn commit: r1302151 - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/namefind/NameSample.java
test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
Author: colen
Date: Sun Mar 18 16:11:17 2012
New Revision: 1302151
URL: http://svn.apache.org/viewvc?rev=1302151&view=rev
Log:
OPENNLP-478: Now NameSample creates spans with a default type if the sample was untyped.
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java?rev=1302151&r1=1302150&r2=1302151&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java Sun Mar 18 16:11:17 2012
@@ -38,6 +38,9 @@ public class NameSample {
private final String[][] additionalContext;
private final boolean isClearAdaptiveData;
+ /** The a default type value when there is no type in training data. */
+ public static final String DEFAULT_TYPE = "default";
+
/**
* Initializes the current instance.
*
@@ -188,8 +191,14 @@ public class NameSample {
}
private static final Pattern START_TAG_PATTERN = Pattern.compile("<START(:([^:>\\s]*))?>");
+
+ public static NameSample parse(String taggedTokens,
+ boolean isClearAdaptiveData) throws IOException {
+ return parse(taggedTokens, DEFAULT_TYPE, isClearAdaptiveData);
+ }
- public static NameSample parse(String taggedTokens, boolean isClearAdaptiveData)
+ public static NameSample parse(String taggedTokens, String defaultType,
+ boolean isClearAdaptiveData)
// TODO: Should throw another exception, and then convert it into an IOException in the stream
throws IOException {
String[] parts = WhitespaceTokenizer.INSTANCE.tokenize(taggedTokens);
@@ -197,7 +206,7 @@ public class NameSample {
List<String> tokenList = new ArrayList<String>(parts.length);
List<Span> nameList = new ArrayList<Span>();
- String nameType = null;
+ String nameType = defaultType;
int startIndex = -1;
int wordIndex = 0;
@@ -214,9 +223,12 @@ public class NameSample {
}
catchingName = true;
startIndex = wordIndex;
- nameType = startMatcher.group(2);
- if(nameType != null && nameType.length() == 0) {
- throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
+ String nameTypeFromSample = startMatcher.group(2);
+ if(nameTypeFromSample != null) {
+ if(nameTypeFromSample.length() == 0) {
+ throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
+ }
+ nameType = nameTypeFromSample;
}
}
Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java?rev=1302151&r1=1302150&r2=1302151&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java Sun Mar 18 16:11:17 2012
@@ -103,28 +103,32 @@ public class NameSampleDataStreamTest {
}
assertEquals(expectedNames.length, names.size());
- assertEquals(new Span(6,8), spans.get(0));
- assertEquals(new Span(3,4), spans.get(1));
- assertEquals(new Span(1,3), spans.get(2));
- assertEquals(new Span(4,6), spans.get(3));
- assertEquals(new Span(1,2), spans.get(4));
- assertEquals(new Span(4,6), spans.get(5));
- assertEquals(new Span(2,3), spans.get(6));
- assertEquals(new Span(16,17), spans.get(7));
- assertEquals(new Span(0,2), spans.get(8));
- assertEquals(new Span(0,1), spans.get(9));
- assertEquals(new Span(3,5), spans.get(10));
- assertEquals(new Span(3,5), spans.get(11));
- assertEquals(new Span(10,12), spans.get(12));
- assertEquals(new Span(1,3), spans.get(13));
- assertEquals(new Span(6,8), spans.get(14));
- assertEquals(new Span(6,8), spans.get(15));
- assertEquals(new Span(8,10), spans.get(16));
- assertEquals(new Span(12,14), spans.get(17));
- assertEquals(new Span(1,3), spans.get(18));
- assertEquals(new Span(0,1), spans.get(19));
- assertEquals(new Span(2,4), spans.get(20));
- assertEquals(new Span(5,6), spans.get(21));
+ assertEquals(createDefaultSpan(6,8), spans.get(0));
+ assertEquals(createDefaultSpan(3,4), spans.get(1));
+ assertEquals(createDefaultSpan(1,3), spans.get(2));
+ assertEquals(createDefaultSpan(4,6), spans.get(3));
+ assertEquals(createDefaultSpan(1,2), spans.get(4));
+ assertEquals(createDefaultSpan(4,6), spans.get(5));
+ assertEquals(createDefaultSpan(2,3), spans.get(6));
+ assertEquals(createDefaultSpan(16,17), spans.get(7));
+ assertEquals(createDefaultSpan(0,2), spans.get(8));
+ assertEquals(createDefaultSpan(0,1), spans.get(9));
+ assertEquals(createDefaultSpan(3,5), spans.get(10));
+ assertEquals(createDefaultSpan(3,5), spans.get(11));
+ assertEquals(createDefaultSpan(10,12), spans.get(12));
+ assertEquals(createDefaultSpan(1,3), spans.get(13));
+ assertEquals(createDefaultSpan(6,8), spans.get(14));
+ assertEquals(createDefaultSpan(6,8), spans.get(15));
+ assertEquals(createDefaultSpan(8,10), spans.get(16));
+ assertEquals(createDefaultSpan(12,14), spans.get(17));
+ assertEquals(createDefaultSpan(1,3), spans.get(18));
+ assertEquals(createDefaultSpan(0,1), spans.get(19));
+ assertEquals(createDefaultSpan(2,4), spans.get(20));
+ assertEquals(createDefaultSpan(5,6), spans.get(21));
+ }
+
+ private Span createDefaultSpan(int s, int e) {
+ return new Span(s, e, NameSample.DEFAULT_TYPE);
}
/**