You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/07/25 01:28:11 UTC
svn commit: r1365346 -
/opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
Author: joern
Date: Tue Jul 24 23:28:11 2012
New Revision: 1365346
URL: http://svn.apache.org/viewvc?rev=1365346&view=rev
Log:
OPENNLP-480 Fixed bug in offset handling.
Modified:
opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
Modified: opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java?rev=1365346&r1=1365345&r2=1365346&view=diff
==============================================================================
--- opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java (original)
+++ opennlp/sandbox/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java Tue Jul 24 23:28:11 2012
@@ -118,7 +118,11 @@ public class NameFinderResource {
String[][] tokenizedSentences = new String[sentenceSpans.length][];
for (int i = 0; i < sentenceSpans.length; i++) {
+ // offset of sentence gets lost here!
Span tokenSpans[] = tokenizer.tokenizePos(sentenceSpans[i].getCoveredText(document).toString());
+ // all spans need to be sentence offset adjusted!
+ tokenSpans = offsetSpans(tokenSpans, sentenceSpans[i].getStart());
+
tokenizedSentencesSpan.add(tokenSpans);
String tokens[] = new String[tokenSpans.length];
@@ -137,4 +141,17 @@ public class NameFinderResource {
ServiceUtil.releaseService(preprocessFactoryService);
}
}
+
+ private Span[] offsetSpans(
+ Span[] tokenSpans, int offset) {
+
+ Span spans[] = new Span[tokenSpans.length];
+
+ for (int i = 0; i < tokenSpans.length; i++) {
+ spans[i] = new Span(tokenSpans[i].getStart() + offset,
+ tokenSpans[i].getEnd() + offset);
+ }
+
+ return spans;
+ }
}