You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/16 12:55:07 UTC
svn commit: r1523586 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat:
BratNameSampleStream.java SpanAnnotation.java
Author: joern
Date: Mon Sep 16 10:55:06 2013
New Revision: 1523586
URL: http://svn.apache.org/r1523586
Log:
OPENNLP-596 Fixed the off by one bug in the span calculation, enhanced error logging, and now sets the name sample id to the file name
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java?rev=1523586&r1=1523585&r2=1523586&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java Mon Sep 16 10:55:06 2013
@@ -113,7 +113,7 @@ public class BratNameSampleStream extend
for (int i = 0; i < tokens.length; i++) {
tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
- tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i);
+ tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
}
List<Span> names = new ArrayList<Span>();
@@ -128,22 +128,24 @@ public class BratNameSampleStream extend
if (sentence.contains(entitySpan)) {
entityIdSet.remove(ann.getId());
+ entitySpan = entitySpan.trim(sample.getText());
+
Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart());
Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd());
-
+
if (nameBeginIndex != null && nameEndIndex != null) {
names.add(new Span(nameBeginIndex, nameEndIndex, entity.getType()));
}
else {
- System.err.println("Dropped entity " + entity.getId() + " in document " +
+ System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " +
sample.getId() + ", it is not matching tokenization!");
}
}
}
}
- samples.add(new NameSample(Span.spansToStrings(tokens, sentenceText),
- names.toArray(new Span[names.size()]), samples.size() == 0));
+ samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
+ names.toArray(new Span[names.size()]), null, samples.size() == 0));
}
for (String id : entityIdSet) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java?rev=1523586&r1=1523585&r2=1523586&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java Mon Sep 16 10:55:06 2013
@@ -34,6 +34,10 @@ public class SpanAnnotation extends Brat
return span;
}
+ public String getCoveredText() {
+ return coveredText;
+ }
+
@Override
public String toString() {
return super.toString() + " " + span.getStart() + " " + span.getEnd() + " " + coveredText;