You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/18 17:11:18 UTC

svn commit: r1302151 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/NameSample.java test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java

Author: colen
Date: Sun Mar 18 16:11:17 2012
New Revision: 1302151

URL: http://svn.apache.org/viewvc?rev=1302151&view=rev
Log:
OPENNLP-478: Now NameSample creates spans with a default type if the sample was untyped.

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java?rev=1302151&r1=1302150&r2=1302151&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java Sun Mar 18 16:11:17 2012
@@ -38,6 +38,9 @@ public class NameSample {
   private final String[][] additionalContext;
   private final boolean isClearAdaptiveData;
 
+  /** The a default type value when there is no type in training data. */
+  public static final String DEFAULT_TYPE = "default";
+
   /**
    * Initializes the current instance.
    *
@@ -188,8 +191,14 @@ public class NameSample {
   }
   
   private static final Pattern START_TAG_PATTERN = Pattern.compile("<START(:([^:>\\s]*))?>");
+
+  public static NameSample parse(String taggedTokens,
+      boolean isClearAdaptiveData) throws IOException {
+    return parse(taggedTokens, DEFAULT_TYPE, isClearAdaptiveData);
+  }
   
-  public static NameSample parse(String taggedTokens, boolean isClearAdaptiveData)
+  public static NameSample parse(String taggedTokens, String defaultType,
+      boolean isClearAdaptiveData)
     // TODO: Should throw another exception, and then convert it into an IOException in the stream
     throws IOException {
     String[] parts = WhitespaceTokenizer.INSTANCE.tokenize(taggedTokens);
@@ -197,7 +206,7 @@ public class NameSample {
     List<String> tokenList = new ArrayList<String>(parts.length);
     List<Span> nameList = new ArrayList<Span>();
 
-    String nameType = null;
+    String nameType = defaultType;
     int startIndex = -1;
     int wordIndex = 0;
     
@@ -214,9 +223,12 @@ public class NameSample {
         }
         catchingName = true;
         startIndex = wordIndex;
-        nameType = startMatcher.group(2);
-        if(nameType != null && nameType.length() == 0) {
-          throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
+        String nameTypeFromSample = startMatcher.group(2);
+        if(nameTypeFromSample != null) {
+          if(nameTypeFromSample.length() == 0) {
+            throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
+          }
+          nameType = nameTypeFromSample;
         }
           
       }

Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java?rev=1302151&r1=1302150&r2=1302151&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java Sun Mar 18 16:11:17 2012
@@ -103,28 +103,32 @@ public class NameSampleDataStreamTest {
     }
 
     assertEquals(expectedNames.length, names.size());
-    assertEquals(new Span(6,8), spans.get(0));
-    assertEquals(new Span(3,4), spans.get(1));
-    assertEquals(new Span(1,3), spans.get(2));
-    assertEquals(new Span(4,6), spans.get(3));
-    assertEquals(new Span(1,2), spans.get(4));
-    assertEquals(new Span(4,6), spans.get(5));
-    assertEquals(new Span(2,3), spans.get(6));
-    assertEquals(new Span(16,17), spans.get(7));
-    assertEquals(new Span(0,2), spans.get(8));
-    assertEquals(new Span(0,1), spans.get(9));
-    assertEquals(new Span(3,5), spans.get(10));
-    assertEquals(new Span(3,5), spans.get(11));
-    assertEquals(new Span(10,12), spans.get(12));
-    assertEquals(new Span(1,3), spans.get(13));
-    assertEquals(new Span(6,8), spans.get(14));
-    assertEquals(new Span(6,8), spans.get(15));
-    assertEquals(new Span(8,10), spans.get(16));
-    assertEquals(new Span(12,14), spans.get(17));
-    assertEquals(new Span(1,3), spans.get(18));
-    assertEquals(new Span(0,1), spans.get(19));
-    assertEquals(new Span(2,4), spans.get(20));
-    assertEquals(new Span(5,6), spans.get(21));
+    assertEquals(createDefaultSpan(6,8), spans.get(0));
+    assertEquals(createDefaultSpan(3,4), spans.get(1));
+    assertEquals(createDefaultSpan(1,3), spans.get(2));
+    assertEquals(createDefaultSpan(4,6), spans.get(3));
+    assertEquals(createDefaultSpan(1,2), spans.get(4));
+    assertEquals(createDefaultSpan(4,6), spans.get(5));
+    assertEquals(createDefaultSpan(2,3), spans.get(6));
+    assertEquals(createDefaultSpan(16,17), spans.get(7));
+    assertEquals(createDefaultSpan(0,2), spans.get(8));
+    assertEquals(createDefaultSpan(0,1), spans.get(9));
+    assertEquals(createDefaultSpan(3,5), spans.get(10));
+    assertEquals(createDefaultSpan(3,5), spans.get(11));
+    assertEquals(createDefaultSpan(10,12), spans.get(12));
+    assertEquals(createDefaultSpan(1,3), spans.get(13));
+    assertEquals(createDefaultSpan(6,8), spans.get(14));
+    assertEquals(createDefaultSpan(6,8), spans.get(15));
+    assertEquals(createDefaultSpan(8,10), spans.get(16));
+    assertEquals(createDefaultSpan(12,14), spans.get(17));
+    assertEquals(createDefaultSpan(1,3), spans.get(18));
+    assertEquals(createDefaultSpan(0,1), spans.get(19));
+    assertEquals(createDefaultSpan(2,4), spans.get(20));
+    assertEquals(createDefaultSpan(5,6), spans.get(21));
+  }
+  
+  private Span createDefaultSpan(int s, int e) {
+    return new Span(s, e, NameSample.DEFAULT_TYPE);
   }
 
   /**