You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/09/16 12:55:07 UTC

svn commit: r1523586 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat: BratNameSampleStream.java SpanAnnotation.java

Author: joern
Date: Mon Sep 16 10:55:06 2013
New Revision: 1523586

URL: http://svn.apache.org/r1523586
Log:
OPENNLP-596 Fixed the off by one bug in the span calculation, enhanced error logging, and now sets the name sample id to the file name

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java?rev=1523586&r1=1523585&r2=1523586&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java Mon Sep 16 10:55:06 2013
@@ -113,7 +113,7 @@ public class BratNameSampleStream extend
       
       for (int i = 0; i < tokens.length; i++) {
         tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
-        tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i);
+        tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
       }
       
       List<Span> names = new ArrayList<Span>();
@@ -128,22 +128,24 @@ public class BratNameSampleStream extend
           if (sentence.contains(entitySpan)) {
             entityIdSet.remove(ann.getId());
             
+            entitySpan = entitySpan.trim(sample.getText());
+            
             Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart());
             Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd());
-           
+            
             if (nameBeginIndex != null && nameEndIndex != null) {
               names.add(new Span(nameBeginIndex, nameEndIndex, entity.getType()));
             }
             else {
-              System.err.println("Dropped entity " + entity.getId() + " in document " + 
+              System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + 
                   sample.getId() + ", it is not matching tokenization!");
             }
           }
         }
       }
       
-      samples.add(new NameSample(Span.spansToStrings(tokens, sentenceText),
-          names.toArray(new Span[names.size()]), samples.size() == 0));
+      samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
+          names.toArray(new Span[names.size()]), null, samples.size() == 0));
     }
     
     for (String id : entityIdSet) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java?rev=1523586&r1=1523585&r2=1523586&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/brat/SpanAnnotation.java Mon Sep 16 10:55:06 2013
@@ -34,6 +34,10 @@ public class SpanAnnotation extends Brat
     return span;
   }
   
+  public String getCoveredText() {
+    return coveredText;
+  }
+  
   @Override
   public String toString() {
     return super.toString() + " " + span.getStart() + " " + span.getEnd() + " " + coveredText;