You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/11/30 10:32:16 UTC

svn commit: r1415556 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/RelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java pipelines/RelationExtractorTrain.java

Author: stevenbethard
Date: Fri Nov 30 09:32:15 2012
New Revision: 1415556

URL: http://svn.apache.org/viewvc?rev=1415556&view=rev
Log:
Moves error printing to evaluation (out of the annotator, where it didn't really belong)

Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Fri Nov 30 09:32:15 2012
@@ -18,9 +18,6 @@
  */
 package org.apache.ctakes.relationextractor.ae;
 
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -40,9 +37,6 @@ import org.cleartk.classifier.Instance;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.util.JCasUtil;
 
-import com.google.common.base.Objects;
-import com.google.common.io.Files;
-
 import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
@@ -60,8 +54,6 @@ public abstract class RelationExtractorA
   public static final String NO_RELATION_CATEGORY = "-NONE-";
 
   public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
-  
-  public static int relationId; // counter for error logging
 
   @ConfigurationParameter(
       name = PARAM_GOLD_VIEW_NAME,
@@ -77,26 +69,6 @@ public abstract class RelationExtractorA
       description = "probability that a negative example should be retained for training")
   protected double probabilityOfKeepingANegativeExample = 1.0;
   
-  public static final String PARAM_PRINT_ERRORS = "PrintErrors";
-  
-  @ConfigurationParameter(
-		 name = PARAM_PRINT_ERRORS,
-		 mandatory = false,
-		 description = "Print errors true/false",
-		 defaultValue = "false")
-  boolean printErrors;
-  
-  public static final String PARAM_ERROR_FILE = "ErrorOutputStream";
-  //private static final String DEFAULT_ERROR_OUT = "System.out";
-  
-  @ConfigurationParameter(
-		 name = PARAM_ERROR_FILE,
-		 mandatory = false,
-		 description = "If PARAM_PRINT_ERRORS is true, this indicates where to write files.  If unspecified, it will output to STDOUT.")
-		 //defaultValue = DEFAULT_ERROR_OUT)
-  protected File errorFile = null;
-  protected PrintStream errorOutStream;
-  
   protected Random coin = new Random(0);
 
   /**
@@ -117,21 +89,7 @@ public abstract class RelationExtractorA
     if (this.isTraining() && this.goldViewName == null) {
       throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
     }
-    relationId = 0;
-    
-    try {
-    	if (errorFile == null) {
-    		this.errorOutStream = System.out;
-    	} else {
-    		this.errorOutStream = new PrintStream(Files.newOutputStreamSupplier(errorFile).getOutput());
-    	}
-	} catch (IOException e) {
-		throw new ResourceInitializationException(e);
-	}
-    
   }
-  
-  
  
   /**
    * Selects the relevant mentions/annotations within a sentence for relation identification/extraction
@@ -144,22 +102,6 @@ public abstract class RelationExtractorA
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     // during training, pull entity and relation annotations from the manual annotation view
-    
-  	// map argument spans to the category of the relation between them
-  	HashMap<HashableArguments, String> categoryLookup = new HashMap<HashableArguments, String>();
-  	
-  	// get gold standard relation instances during testing for error analysis
-  	if (! this.isTraining() && printErrors) {
-  		JCas goldView;
-  		try {
-  			goldView = jCas.getView("GoldView");
-  		} catch(CASException e) {
-  			throw new AnalysisEngineProcessException(e);
-  		}
-  		
-  		categoryLookup = createCategoryLookup(goldView); 
-  	}
-  	
   	JCas identifiedAnnotationView, relationView;
     if (this.isTraining()) {
       try {
@@ -216,17 +158,6 @@ public abstract class RelationExtractorA
     		else {
     			String predictedCategory = this.classifier.classify(features);
 
-    			if(printErrors) {
-    				String goldCategory; // gold standard relation category
-    				if (categoryLookup.containsKey(new HashableArguments(arg1, arg2))) {
-    					goldCategory = categoryLookup.get(new HashableArguments(arg1, arg2));
-    				} else {
-    					goldCategory = NO_RELATION_CATEGORY;
-    				}
-
-    				logResults(sentence, arg1, arg2, features, predictedCategory, goldCategory);
-    			}
-    			
     			// add a relation annotation if a true relation was predicted
     			if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
 
@@ -269,21 +200,6 @@ public abstract class RelationExtractorA
   protected abstract String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
 		  IdentifiedAnnotation arg1, IdentifiedAnnotation arg2);
 
-  private void logResults(Sentence sentence, IdentifiedAnnotation arg1,
-		  IdentifiedAnnotation arg2, List<Feature> features, String predictedCategory,
-		  String goldCategory) {
-	  if (printErrors && !predictedCategory.equals(goldCategory)) {
-		  errorOutStream.format("%-15s%d\n", "instance:", relationId++);
-		  errorOutStream.format("%-15s%s\n", "prediction:", predictedCategory);
-		  errorOutStream.format("%-15s%s\n", "gold label:", goldCategory);
-		  errorOutStream.format("%-15s%s\n", "arg1:", arg1.getCoveredText());
-		  errorOutStream.format("%-15s%s\n", "arg2:", arg2.getCoveredText());
-		  errorOutStream.format("%-15s%s\n", "sentence:", sentence.getCoveredText());
-		  errorOutStream.format("\n%s\n\n", features);
-		  errorOutStream.println();
-	  }
-  }
-
   /**
    * Creates a lookup map between lists of arguments and their relation
    * This map does not key in simply on a HashableArgument because 
@@ -309,23 +225,6 @@ public abstract class RelationExtractorA
 	  return relationLookup;
   }
 
-  /**
-   * Creates a lookup map between relations and their categories
-   * This is used for error analysis
-   * @param goldView
-   * @return
-   */
-  private static HashMap<HashableArguments, String> createCategoryLookup(JCas goldView) {
-	  // save gold relations for lookup during error analysis; normalize order of arguments
-	  HashMap<HashableArguments, String> categoryLookup = new HashMap<HashableArguments, String>();
-	  for (BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
-		  // arguments must be in the correct order to be found during lookup
-		  categoryLookup.put(new HashableArguments(relation), relation.getCategory());
-	  }
-	  return categoryLookup;
-  }
-  
-  
   public static class IdentifiedAnnotationPair {
 	  
 	 private final IdentifiedAnnotation arg1;
@@ -339,77 +238,4 @@ public abstract class RelationExtractorA
 		 
 	 public final IdentifiedAnnotation getArg2() { return arg2; }
   }
-	  
-  
-  /**
-   * This class is useful for mapping the spans of relation arguments to the relation's category.
-   */
-  public static class HashableArguments {
-
-    protected int arg1begin;
-    protected int arg1end;
-    protected int arg2begin;
-    protected int arg2end;
-
-    public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
-      this.arg1begin = arg1begin;
-      this.arg1end = arg1end;
-      this.arg2begin = arg2begin;
-      this.arg2end = arg2end;
-    }
-
-    public HashableArguments(Annotation arg1, Annotation arg2) {
-      this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
-    }
-
-    public HashableArguments(BinaryTextRelation relation) {
-      this(getArg1(relation), getArg2(relation));
-    }
-
-    // HACK: arg1 is not always arg1 because of bugs in the reader
-    private static Annotation getArg1(BinaryTextRelation rel) {
-      RelationArgument arg1 = rel.getArg1();
-      return arg1.getRole().equals("Argument") ? arg1.getArgument() : rel.getArg2().getArgument();
-    }
-
-    // HACK: arg2 is not always arg2 because of bugs in the reader
-    private static Annotation getArg2(BinaryTextRelation rel) {
-      RelationArgument arg2 = rel.getArg2();
-      return arg2.getRole().equals("Related_to") ? arg2.getArgument() : rel.getArg1().getArgument();
-    }
-
-	@Override
-	public boolean equals(Object otherObject) {
-	  boolean result = false;
-		if (otherObject instanceof HashableArguments) {
-			HashableArguments other = (HashableArguments) otherObject;
-			result = (this.getClass() == other.getClass() && 
-					this.arg1begin == other.arg1begin && 
-					this.arg1end == other.arg1end && 
-					this.arg2begin == other.arg2begin && 
-					this.arg2end == other.arg2end);
-		}
-		return result;
-	}
-	
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(
-          this.arg1begin,
-          this.arg1end,
-          this.arg2begin,
-          this.arg2end);
-    }
-    
-	@Override
-	public String toString() {
-		return String.format(
-				"%s(%s,%s,%s,%s)",
-				this.getClass().getSimpleName(),
-				this.arg1begin,
-				this.arg1end,
-				this.arg2begin,
-				this.arg2end);
-	}
-  }
 }

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Fri Nov 30 09:32:15 2012
@@ -26,6 +26,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -67,12 +68,13 @@ import com.google.common.base.Functions;
 import com.google.common.base.Objects;
 import com.google.common.base.Objects.ToStringHelper;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
+import com.google.common.collect.Sets;
 
 import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.HashableArguments;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -115,6 +117,11 @@ public class RelationExtractorEvaluation
         usage = "evaluate test performance on ctakes entities, instead of gold standard entities")
     public boolean testOnCTakes = false;
 
+    @Option(
+        name = "--print-errors",
+        usage = "print relations that were incorrectly predicted")
+    public boolean printErrors = false;
+
   }
 
   public static final String GOLD_VIEW_NAME = "GoldView";
@@ -195,9 +202,7 @@ public class RelationExtractorEvaluation
             RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
             params.probabilityOfKeepingANegativeExample,
             EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
-            params.classifyBothDirections,
-            RelationExtractorAnnotator.PARAM_PRINT_ERRORS,
-            false };
+            params.classifyBothDirections };
 
         // define arguments to be passed to the classifier
         String[] trainingArguments = new String[] {
@@ -216,7 +221,8 @@ public class RelationExtractorEvaluation
             dataWriterClass,
             additionalParameters,
             trainingArguments,
-            options.testOnCTakes);
+            options.testOnCTakes,
+            options.printErrors);
 
         if (options.devDirectory != null) {
           if (options.testDirectory != null) {
@@ -300,7 +306,8 @@ public class RelationExtractorEvaluation
       Class<? extends DataWriter<String>> dataWriterClass,
       Object[] additionalParameters,
       String[] trainingArguments,
-      boolean testOnCTakes) {
+      boolean testOnCTakes,
+      boolean printErrors) {
     super(baseDirectory);
     this.relationCategory = relationCategory;
     this.classifierAnnotatorClass = classifierAnnotatorClass;
@@ -308,6 +315,7 @@ public class RelationExtractorEvaluation
     this.additionalParameters = additionalParameters;
     this.trainingArguments = trainingArguments;
     this.testOnCTakes = testOnCTakes;
+    this.printErrors = printErrors;
   }
   
   private String relationCategory;
@@ -321,6 +329,8 @@ public class RelationExtractorEvaluation
   private String[] trainingArguments;
   
   private boolean testOnCTakes;
+  
+  private boolean printErrors;
 
   @Override
   public CollectionReader getCollectionReader(List<File> items) throws Exception {
@@ -442,6 +452,32 @@ public class RelationExtractorEvaluation
           systemBinaryTextRelations,
           getSpan,
           getOutcome);
+      
+      // print errors if requested
+      if (this.printErrors) {
+        Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
+        for (BinaryTextRelation relation : goldBinaryTextRelations) {
+          goldMap.put(new HashableArguments(relation), relation);
+        }
+        Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
+        for (BinaryTextRelation relation : systemBinaryTextRelations) {
+          systemMap.put(new HashableArguments(relation), relation);
+        }
+        Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+        List<HashableArguments> sorted = Lists.newArrayList(all);
+        Collections.sort(sorted);
+        for (HashableArguments key : sorted) {
+          BinaryTextRelation goldRelation = goldMap.get(key);
+          BinaryTextRelation systemRelation = systemMap.get(key);
+          if (goldRelation == null) {
+            System.out.println("System added:         " + formatRelation(systemRelation));
+          } else if (systemRelation == null) {
+            System.out.println("System dropped:       " + formatRelation(systemRelation));
+          } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
+            System.out.println("System misclassified: " + formatRelation(systemRelation));
+          }
+        }
+      }
     }
 
     System.err.printf("%s: %s:\n", this.relationCategory, directory.getName());
@@ -450,6 +486,22 @@ public class RelationExtractorEvaluation
     System.err.println();
     return stats;
   }
+  
+  private static String formatRelation(BinaryTextRelation relation) {
+    String text = relation.getCAS().getDocumentText();
+    Annotation arg1 = relation.getArg1().getArgument();
+    Annotation arg2 = relation.getArg2().getArgument();
+    int begin = Math.min(arg1.getBegin(), arg2.getBegin());
+    int end = Math.max(arg1.getBegin(), arg2.getBegin());
+    begin = Math.max(0, begin - 50);
+    end = Math.min(text.length(), end + 50);
+    return String.format(
+        "%s(%s, %s) in ...%s...",
+        relation.getCategory(),
+        arg1.getCoveredText(),
+        arg2.getCoveredText(),
+        text.substring(begin, end).replaceAll("[\r\n]", " "));
+  }
 
   /**
    * Holds a set of parameters for a relation extraction model
@@ -703,4 +755,89 @@ public class RelationExtractorEvaluation
       }
     }
   }
+  
+  /**
+   * This class is useful for mapping the spans of relation arguments to the relation's category.
+   */
+  public static class HashableArguments implements Comparable<HashableArguments> {
+
+    protected int arg1begin;
+
+    protected int arg1end;
+
+    protected int arg2begin;
+
+    protected int arg2end;
+
+    public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
+      this.arg1begin = arg1begin;
+      this.arg1end = arg1end;
+      this.arg2begin = arg2begin;
+      this.arg2end = arg2end;
+    }
+
+    public HashableArguments(Annotation arg1, Annotation arg2) {
+      this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
+    }
+
+    public HashableArguments(BinaryTextRelation relation) {
+      this(getArg1(relation), getArg2(relation));
+    }
+
+    // HACK: arg1 is not always arg1 because of bugs in the reader
+    private static Annotation getArg1(BinaryTextRelation rel) {
+      RelationArgument arg1 = rel.getArg1();
+      return arg1.getRole().equals("Argument") ? arg1.getArgument() : rel.getArg2().getArgument();
+    }
+
+    // HACK: arg2 is not always arg2 because of bugs in the reader
+    private static Annotation getArg2(BinaryTextRelation rel) {
+      RelationArgument arg2 = rel.getArg2();
+      return arg2.getRole().equals("Related_to")
+          ? arg2.getArgument()
+          : rel.getArg1().getArgument();
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      boolean result = false;
+      if (otherObject instanceof HashableArguments) {
+        HashableArguments other = (HashableArguments) otherObject;
+        result = (this.getClass() == other.getClass() && this.arg1begin == other.arg1begin
+            && this.arg1end == other.arg1end && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
+      }
+      return result;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(this.arg1begin, this.arg1end, this.arg2begin, this.arg2end);
+    }
+
+    @Override
+    public String toString() {
+      return String.format(
+          "%s(%s,%s,%s,%s)",
+          this.getClass().getSimpleName(),
+          this.arg1begin,
+          this.arg1end,
+          this.arg2begin,
+          this.arg2end);
+    }
+
+    @Override
+    public int compareTo(HashableArguments that) {
+      int thisBegin = Math.min(this.arg1begin, this.arg2begin);
+      int thatBegin = Math.min(that.arg1begin, that.arg2begin);
+      if (thisBegin < thatBegin) {
+        return -1;
+      } else if (thisBegin > thatBegin) {
+        return +1;
+      } else if (this.equals(that)) {
+        return 0;
+      } else {
+        return +1; // arbitrary choice for overlapping
+      }
+    }
+  }
 }

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java Fri Nov 30 09:32:15 2012
@@ -105,9 +105,7 @@ public class RelationExtractorTrain {
 			  RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
 			  params.probabilityOfKeepingANegativeExample,
         EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
-			  params.classifyBothDirections,
-			  RelationExtractorAnnotator.PARAM_PRINT_ERRORS,
-			  false };
+			  params.classifyBothDirections };
   
 	  // define arguments to be passed to the classifier
 	  String[] trainingArguments = new String[] {
@@ -125,6 +123,7 @@ public class RelationExtractorTrain {
 	    		dataWriterClass,
 	    		additionalParameters,
 	    		trainingArguments,
+	    		false,
 	    		false);
 	    
 	    CollectionReader collectionReader = evaluation.getCollectionReader(trainFiles);