You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/30 14:22:19 UTC

svn commit: r1129141 - /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java

Author: joern
Date: Mon May 30 12:22:19 2011
New Revision: 1129141

URL: http://svn.apache.org/viewvc?rev=1129141&view=rev
Log:
OPENNLP-142 Improved exception messages, they now contain a little context which makes it possible to locate the line in the training data

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java?rev=1129141&r1=1129140&r2=1129141&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java Mon May 30 12:22:19 2011
@@ -160,6 +160,32 @@ public class NameSample {
     return result.toString();
   }
   
+  private static String errorTokenWithContext(String sentence[], int index) {
+    
+    StringBuilder errorString = new StringBuilder();
+    
+    // two token before
+    if (index > 1)
+      errorString.append(sentence[index -2]).append(" ");
+    
+    if (index > 0)
+      errorString.append(sentence[index -1]).append(" ");
+    
+    // token itself
+    errorString.append("###");
+    errorString.append(sentence[index]);
+    errorString.append("###").append(" ");
+    
+    // two token after
+    if (index + 1 < sentence.length)
+      errorString.append(sentence[index + 1]).append(" ");
+
+    if (index + 2 < sentence.length)
+      errorString.append(sentence[index + 2]);
+    
+    return errorString.toString();
+  }
+  
   public static NameSample parse(String taggedTokens, boolean isClearAdaptiveData)
     // TODO: Should throw another exception, and then convert it into an IOException in the stream
     throws IOException {
@@ -182,19 +208,20 @@ public class NameSample {
       Matcher startMatcher = startTagPattern.matcher(parts[pi]);
       if (startMatcher.matches()) {
         if(catchingName) {
-          throw new IOException("Found unexpected annotation " + parts[pi] + " while handling a name sequence.");
+          throw new IOException("Found unexpected annotation" + 
+              " while handling a name sequence: " + errorTokenWithContext(parts, pi));
         }
         catchingName = true;
         startIndex = wordIndex;
         nameType = startMatcher.group(2);
         if(nameType != null && nameType.length() == 0) {
-          throw new IOException("Missing a name type: " + parts[pi]);
+          throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
         }
           
       }
       else if (parts[pi].equals(NameSampleDataStream.END_TAG)) {
         if(catchingName == false) {
-          throw new IOException("Found unexpected annotation " + parts[pi] + ".");
+          throw new IOException("Found unexpected annotation: " + errorTokenWithContext(parts, pi));
         }
         catchingName = false;
         // create name