You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/04 23:08:25 UTC

svn commit: r1142807 - in /incubator/opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/NameSample.java test/java/opennlp/tools/namefind/NameSampleTest.java

Author: colen
Date: Mon Jul  4 21:08:25 2011
New Revision: 1142807

URL: http://svn.apache.org/viewvc?rev=1142807&view=rev
Log:
OPENNLP-213 Now name type accepts a larger variety of characters.
Also made the START regex pattern static and final, so it will be created only once.

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java?rev=1142807&r1=1142806&r2=1142807&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java Mon Jul  4 21:08:25 2011
@@ -186,6 +186,8 @@ public class NameSample {
     return errorString.toString();
   }
   
+  private static final Pattern START_TAG_PATTERN = Pattern.compile("<START(:([^:>\\s]*))?>");
+  
   public static NameSample parse(String taggedTokens, boolean isClearAdaptiveData)
     // TODO: Should throw another exception, and then convert it into an IOException in the stream
     throws IOException {
@@ -202,10 +204,8 @@ public class NameSample {
     // leave the NameType property of NameSample null.
     boolean catchingName = false;
     
-    Pattern startTagPattern = Pattern.compile("<START(:(\\w*))?>");
-    
     for (int pi = 0; pi < parts.length; pi++) {
-      Matcher startMatcher = startTagPattern.matcher(parts[pi]);
+      Matcher startMatcher = START_TAG_PATTERN.matcher(parts[pi]);
       if (startMatcher.matches()) {
         if(catchingName) {
           throw new IOException("Found unexpected annotation" + 

Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java?rev=1142807&r1=1142806&r2=1142807&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java Mon Jul  4 21:08:25 2011
@@ -19,6 +19,9 @@
 package opennlp.tools.namefind;
 
 import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
 import opennlp.tools.util.Span;
 
 import org.junit.Test;
@@ -121,4 +124,71 @@ public class NameSampleTest {
     
     assertEquals(8, test.getSentence().length);
   }
+  
+  /**
+   * Checks if it accepts name type with some special characters
+   */
+  @Test
+  public void testTypeWithSpecialChars() throws Exception {
+    NameSample parsedSample = NameSample
+        .parse(
+            "<START:type-1> U . S . <END> "
+                + "President <START:type_2> Barack Obama <END> is considering sending "
+                + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
+            false);
+
+    assertEquals(3, parsedSample.getNames().length);
+    assertEquals("type-1", parsedSample.getNames()[0].getType());
+    assertEquals("type_2", parsedSample.getNames()[1].getType());
+    assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
+  }
+  
+  /**
+   * Test if it fails to parse empty type
+   */
+  @Test(expected=IOException.class)
+  public void testMissingType() throws Exception {
+    NameSample.parse("<START:> token <END>", 
+        false);
+  }
+  
+  /**
+   * Test if it fails to parse type with space
+   * @throws Exception
+   */
+  @Test(expected=IOException.class)
+  public void testTypeWithSpace() throws Exception {
+    NameSample.parse("<START:abc a> token <END>", 
+        false);
+  }
+
+  /**
+   * Test if it fails to parse type with new line
+   * @throws Exception
+   */
+  @Test(expected=IOException.class)
+  public void testTypeWithNewLine() throws Exception {
+    NameSample.parse("<START:abc\na> token <END>", 
+        false);
+  }
+
+  /**
+   * Test if it fails to parse type with :
+   * @throws Exception
+   */
+  @Test(expected=IOException.class)
+  public void testTypeWithInvalidChar1() throws Exception {
+    NameSample.parse("<START:abc:a> token <END>", 
+        false);
+  }
+  
+  /**
+   * Test if it fails to parse type with >
+   * @throws Exception
+   */
+  @Test(expected=IOException.class)
+  public void testTypeWithInvalidChar2() throws Exception {
+    NameSample.parse("<START:abc>a> token <END>", 
+        false);
+  }
 }