You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/04 23:08:25 UTC
svn commit: r1142807 - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/namefind/NameSample.java
test/java/opennlp/tools/namefind/NameSampleTest.java
Author: colen
Date: Mon Jul 4 21:08:25 2011
New Revision: 1142807
URL: http://svn.apache.org/viewvc?rev=1142807&view=rev
Log:
OPENNLP-213 Now name type accepts a larger variety of characters.
Also made the START regex pattern static and final, so it will be created only once.
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java?rev=1142807&r1=1142806&r2=1142807&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java Mon Jul 4 21:08:25 2011
@@ -186,6 +186,8 @@ public class NameSample {
return errorString.toString();
}
+ private static final Pattern START_TAG_PATTERN = Pattern.compile("<START(:([^:>\\s]*))?>");
+
public static NameSample parse(String taggedTokens, boolean isClearAdaptiveData)
// TODO: Should throw another exception, and then convert it into an IOException in the stream
throws IOException {
@@ -202,10 +204,8 @@ public class NameSample {
// leave the NameType property of NameSample null.
boolean catchingName = false;
- Pattern startTagPattern = Pattern.compile("<START(:(\\w*))?>");
-
for (int pi = 0; pi < parts.length; pi++) {
- Matcher startMatcher = startTagPattern.matcher(parts[pi]);
+ Matcher startMatcher = START_TAG_PATTERN.matcher(parts[pi]);
if (startMatcher.matches()) {
if(catchingName) {
throw new IOException("Found unexpected annotation" +
Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java?rev=1142807&r1=1142806&r2=1142807&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java Mon Jul 4 21:08:25 2011
@@ -19,6 +19,9 @@
package opennlp.tools.namefind;
import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
import opennlp.tools.util.Span;
import org.junit.Test;
@@ -121,4 +124,71 @@ public class NameSampleTest {
assertEquals(8, test.getSentence().length);
}
+
+ /**
+ * Checks if it accepts name type with some special characters
+ */
+ @Test
+ public void testTypeWithSpecialChars() throws Exception {
+ NameSample parsedSample = NameSample
+ .parse(
+ "<START:type-1> U . S . <END> "
+ + "President <START:type_2> Barack Obama <END> is considering sending "
+ + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
+ false);
+
+ assertEquals(3, parsedSample.getNames().length);
+ assertEquals("type-1", parsedSample.getNames()[0].getType());
+ assertEquals("type_2", parsedSample.getNames()[1].getType());
+ assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
+ }
+
+ /**
+ * Test if it fails to parse empty type
+ */
+ @Test(expected=IOException.class)
+ public void testMissingType() throws Exception {
+ NameSample.parse("<START:> token <END>",
+ false);
+ }
+
+ /**
+ * Test if it fails to parse type with space
+ * @throws Exception
+ */
+ @Test(expected=IOException.class)
+ public void testTypeWithSpace() throws Exception {
+ NameSample.parse("<START:abc a> token <END>",
+ false);
+ }
+
+ /**
+ * Test if it fails to parse type with new line
+ * @throws Exception
+ */
+ @Test(expected=IOException.class)
+ public void testTypeWithNewLine() throws Exception {
+ NameSample.parse("<START:abc\na> token <END>",
+ false);
+ }
+
+ /**
+ * Test if it fails to parse type with :
+ * @throws Exception
+ */
+ @Test(expected=IOException.class)
+ public void testTypeWithInvalidChar1() throws Exception {
+ NameSample.parse("<START:abc:a> token <END>",
+ false);
+ }
+
+ /**
+ * Test if it fails to parse type with >
+ * @throws Exception
+ */
+ @Test(expected=IOException.class)
+ public void testTypeWithInvalidChar2() throws Exception {
+ NameSample.parse("<START:abc>a> token <END>",
+ false);
+ }
}