You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jk...@apache.org on 2013/02/21 06:34:07 UTC

svn commit: r1448517 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/RegexNameFinder.java test/java/opennlp/tools/namefind/RegexNameFinderTest.java

Author: jkosin
Date: Thu Feb 21 05:34:07 2013
New Revision: 1448517

URL: http://svn.apache.org/r1448517
Log:
OPENNLP-562:  Fixed Regular Expression NameFinder to return the correct span indexes for the returning spans from find().  Adjusted the Tests to include the new index values and added a test for the type of span returned.

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java?rev=1448517&r1=1448516&r2=1448517&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java Thu Feb 21 05:34:07 2013
@@ -33,15 +33,26 @@ import opennlp.tools.util.Span;
 public final class RegexNameFinder implements TokenNameFinder {
 
   private final Pattern mPatterns[];
+  private final String sType;
 
-  public RegexNameFinder(Pattern patterns[]) {
+  public RegexNameFinder(Pattern patterns[], String type) {
     if (patterns == null || patterns.length == 0) {
       throw new IllegalArgumentException("patterns must not be null or empty!");
     }
 
     mPatterns = patterns;
+    sType = type;
   }
 
+  public RegexNameFinder(Pattern patterns[]) {
+    if (patterns == null || patterns.length == 0) {
+      throw new IllegalArgumentException("patterns must not be null or empty!");
+    }
+
+    mPatterns = patterns;
+    sType = null;
+  }
+  
   public Span[] find(String tokens[]) {
     Map<Integer, Integer> sentencePosTokenMap = new HashMap<Integer, Integer>();
 
@@ -55,7 +66,7 @@ public final class RegexNameFinder imple
       sentenceString.append(tokens[i]);
 
       int endIndex = sentenceString.length();
-      sentencePosTokenMap.put(endIndex, i);
+      sentencePosTokenMap.put(endIndex, i + 1);
 
       if (i < tokens.length - 1) {
         sentenceString.append(' ');
@@ -74,7 +85,7 @@ public final class RegexNameFinder imple
             sentencePosTokenMap.get(matcher.end());
 
         if (tokenStartIndex != null && tokenEndIndex != null) {
-          Span annotation = new Span(tokenStartIndex, tokenEndIndex);
+          Span annotation = new Span(tokenStartIndex, tokenEndIndex, sType);
           annotations.add(annotation);
         }
       }

Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java?rev=1448517&r1=1448516&r2=1448517&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java Thu Feb 21 05:34:07 2013
@@ -45,7 +45,7 @@ public class RegexNameFinderTest {
     assertTrue(result.length == 1);
 
     assertTrue(result[0].getStart() == 1);
-    assertTrue(result[0].getEnd() == 1);
+    assertTrue(result[0].getEnd() == 2);
   }
 
   @Test
@@ -55,14 +55,15 @@ public class RegexNameFinderTest {
     String sentence[] = new String[]{"a", "80", "year", "b", "c"};
 
     RegexNameFinder finder =
-      new RegexNameFinder(new Pattern[]{testPattern});
+      new RegexNameFinder(new Pattern[]{testPattern}, "match");
 
     Span[] result = finder.find(sentence);
 
     assertTrue(result.length == 1);
 
     assertTrue(result[0].getStart() == 1);
-    assertTrue(result[0].getEnd() == 2);
+    assertTrue(result[0].getEnd() == 3);
+    assertTrue(result[0].getType().equals("match"));
   }
 
   @Test



Re: svn commit: r1448517 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/RegexNameFinder.java test/java/opennlp/tools/namefind/RegexNameFinderTest.java

Posted by James Kosin <ja...@gmail.com>.
On 2/21/2013 4:12 AM, Jörn Kottmann wrote:
> On 02/21/2013 06:34 AM, jkosin@apache.org wrote:
>> -  public RegexNameFinder(Pattern patterns[]) {
>> +  public RegexNameFinder(Pattern patterns[], String type) {
>>       if (patterns == null || patterns.length == 0) {
>>         throw new IllegalArgumentException("patterns must not be null 
>> or empty!");
>>       }
>>         mPatterns = patterns;
>> +    sType = type;
>>     }
>>   +  public RegexNameFinder(Pattern patterns[]) {
>> +    if (patterns == null || patterns.length == 0) {
>> +      throw new IllegalArgumentException("patiierns must not be null 
>> or empty!");
>> +    }
>> +
>> +    mPatterns = patterns;
>> +    sType = null;
>> +  }
>> +
>
> Thanks for fixing this James. If I am not mistaken, then the second 
> constructor could just call the
> first one and pass null for the type right? Then we do not need to 
> duplicate the init code.
>
> Jörn
>
Jorn,

I tried but was trying differently...  this(patterns, null) should 
work.  But null may need to be cast as a String.

James

Re: svn commit: r1448517 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/namefind/RegexNameFinder.java test/java/opennlp/tools/namefind/RegexNameFinderTest.java

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/21/2013 06:34 AM, jkosin@apache.org wrote:
> -  public RegexNameFinder(Pattern patterns[]) {
> +  public RegexNameFinder(Pattern patterns[], String type) {
>       if (patterns == null || patterns.length == 0) {
>         throw new IllegalArgumentException("patterns must not be null or empty!");
>       }
>   
>       mPatterns = patterns;
> +    sType = type;
>     }
>   
> +  public RegexNameFinder(Pattern patterns[]) {
> +    if (patterns == null || patterns.length == 0) {
> +      throw new IllegalArgumentException("patterns must not be null or empty!");
> +    }
> +
> +    mPatterns = patterns;
> +    sType = null;
> +  }
> +

Thanks for fixing this James. If I am not mistaken, then the second 
constructor could just call the
first one and pass null for the type right? Then we do not need to 
duplicate the init code.

Jörn