You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/07/25 01:28:11 UTC

svn commit: r1365346 - /opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java

Author: joern
Date: Tue Jul 24 23:28:11 2012
New Revision: 1365346

URL: http://svn.apache.org/viewvc?rev=1365346&view=rev
Log:
OPENNLP-480 Fixed bug in offset handling.

Modified:
    opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java

Modified: opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java?rev=1365346&r1=1365345&r2=1365346&view=diff
==============================================================================
--- opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java (original)
+++ opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java Tue Jul 24 23:28:11 2012
@@ -118,7 +118,11 @@ public class NameFinderResource {
       String[][] tokenizedSentences = new String[sentenceSpans.length][];
       
       for (int i = 0; i < sentenceSpans.length; i++) {
+        // offset of sentence gets lost here!
         Span tokenSpans[] = tokenizer.tokenizePos(sentenceSpans[i].getCoveredText(document).toString());
+        // all spans need to be sentence offset adjusted!
+        tokenSpans = offsetSpans(tokenSpans, sentenceSpans[i].getStart());
+        
         tokenizedSentencesSpan.add(tokenSpans);
         
         String tokens[] = new String[tokenSpans.length];
@@ -137,4 +141,17 @@ public class NameFinderResource {
       ServiceUtil.releaseService(preprocessFactoryService);
     }
   }
+
+  private Span[] offsetSpans(
+      Span[] tokenSpans, int offset) {
+    
+    Span spans[] = new Span[tokenSpans.length];
+    
+    for (int i = 0; i < tokenSpans.length; i++) {
+      spans[i] = new Span(tokenSpans[i].getStart() + offset,
+          tokenSpans[i].getEnd() + offset);
+    }
+    
+    return spans;
+  }
 }