You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/11/30 10:32:16 UTC
svn commit: r1415556 - in
/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor:
ae/RelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java
pipelines/RelationExtractorTrain.java
Author: stevenbethard
Date: Fri Nov 30 09:32:15 2012
New Revision: 1415556
URL: http://svn.apache.org/viewvc?rev=1415556&view=rev
Log:
Moves error printing to evaluation (out of the annotator, where it didn't really belong)
Modified:
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Fri Nov 30 09:32:15 2012
@@ -18,9 +18,6 @@
*/
package org.apache.ctakes.relationextractor.ae;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -40,9 +37,6 @@ import org.cleartk.classifier.Instance;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.util.JCasUtil;
-import com.google.common.base.Objects;
-import com.google.common.io.Files;
-
import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
@@ -60,8 +54,6 @@ public abstract class RelationExtractorA
public static final String NO_RELATION_CATEGORY = "-NONE-";
public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
-
- public static int relationId; // counter for error logging
@ConfigurationParameter(
name = PARAM_GOLD_VIEW_NAME,
@@ -77,26 +69,6 @@ public abstract class RelationExtractorA
description = "probability that a negative example should be retained for training")
protected double probabilityOfKeepingANegativeExample = 1.0;
- public static final String PARAM_PRINT_ERRORS = "PrintErrors";
-
- @ConfigurationParameter(
- name = PARAM_PRINT_ERRORS,
- mandatory = false,
- description = "Print errors true/false",
- defaultValue = "false")
- boolean printErrors;
-
- public static final String PARAM_ERROR_FILE = "ErrorOutputStream";
- //private static final String DEFAULT_ERROR_OUT = "System.out";
-
- @ConfigurationParameter(
- name = PARAM_ERROR_FILE,
- mandatory = false,
- description = "If PARAM_PRINT_ERRORS is true, this indicates where to write files. If unspecified, it will output to STDOUT.")
- //defaultValue = DEFAULT_ERROR_OUT)
- protected File errorFile = null;
- protected PrintStream errorOutStream;
-
protected Random coin = new Random(0);
/**
@@ -117,21 +89,7 @@ public abstract class RelationExtractorA
if (this.isTraining() && this.goldViewName == null) {
throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
}
- relationId = 0;
-
- try {
- if (errorFile == null) {
- this.errorOutStream = System.out;
- } else {
- this.errorOutStream = new PrintStream(Files.newOutputStreamSupplier(errorFile).getOutput());
- }
- } catch (IOException e) {
- throw new ResourceInitializationException(e);
- }
-
}
-
-
/**
* Selects the relevant mentions/annotations within a sentence for relation identification/extraction
@@ -144,22 +102,6 @@ public abstract class RelationExtractorA
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
// during training, pull entity and relation annotations from the manual annotation view
-
- // map argument spans to the category of the relation between them
- HashMap<HashableArguments, String> categoryLookup = new HashMap<HashableArguments, String>();
-
- // get gold standard relation instances during testing for error analysis
- if (! this.isTraining() && printErrors) {
- JCas goldView;
- try {
- goldView = jCas.getView("GoldView");
- } catch(CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
-
- categoryLookup = createCategoryLookup(goldView);
- }
-
JCas identifiedAnnotationView, relationView;
if (this.isTraining()) {
try {
@@ -216,17 +158,6 @@ public abstract class RelationExtractorA
else {
String predictedCategory = this.classifier.classify(features);
- if(printErrors) {
- String goldCategory; // gold standard relation category
- if (categoryLookup.containsKey(new HashableArguments(arg1, arg2))) {
- goldCategory = categoryLookup.get(new HashableArguments(arg1, arg2));
- } else {
- goldCategory = NO_RELATION_CATEGORY;
- }
-
- logResults(sentence, arg1, arg2, features, predictedCategory, goldCategory);
- }
-
// add a relation annotation if a true relation was predicted
if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
@@ -269,21 +200,6 @@ public abstract class RelationExtractorA
protected abstract String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
IdentifiedAnnotation arg1, IdentifiedAnnotation arg2);
- private void logResults(Sentence sentence, IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2, List<Feature> features, String predictedCategory,
- String goldCategory) {
- if (printErrors && !predictedCategory.equals(goldCategory)) {
- errorOutStream.format("%-15s%d\n", "instance:", relationId++);
- errorOutStream.format("%-15s%s\n", "prediction:", predictedCategory);
- errorOutStream.format("%-15s%s\n", "gold label:", goldCategory);
- errorOutStream.format("%-15s%s\n", "arg1:", arg1.getCoveredText());
- errorOutStream.format("%-15s%s\n", "arg2:", arg2.getCoveredText());
- errorOutStream.format("%-15s%s\n", "sentence:", sentence.getCoveredText());
- errorOutStream.format("\n%s\n\n", features);
- errorOutStream.println();
- }
- }
-
/**
* Creates a lookup map between lists of arguments and their relation
* This map does not key in simply on a HashableArgument because
@@ -309,23 +225,6 @@ public abstract class RelationExtractorA
return relationLookup;
}
- /**
- * Creates a lookup map between relations and their categories
- * This is used for error analysis
- * @param goldView
- * @return
- */
- private static HashMap<HashableArguments, String> createCategoryLookup(JCas goldView) {
- // save gold relations for lookup during error analysis; normalize order of arguments
- HashMap<HashableArguments, String> categoryLookup = new HashMap<HashableArguments, String>();
- for (BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
- // arguments must be in the correct order to be found during lookup
- categoryLookup.put(new HashableArguments(relation), relation.getCategory());
- }
- return categoryLookup;
- }
-
-
public static class IdentifiedAnnotationPair {
private final IdentifiedAnnotation arg1;
@@ -339,77 +238,4 @@ public abstract class RelationExtractorA
public final IdentifiedAnnotation getArg2() { return arg2; }
}
-
-
- /**
- * This class is useful for mapping the spans of relation arguments to the relation's category.
- */
- public static class HashableArguments {
-
- protected int arg1begin;
- protected int arg1end;
- protected int arg2begin;
- protected int arg2end;
-
- public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
- this.arg1begin = arg1begin;
- this.arg1end = arg1end;
- this.arg2begin = arg2begin;
- this.arg2end = arg2end;
- }
-
- public HashableArguments(Annotation arg1, Annotation arg2) {
- this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
- }
-
- public HashableArguments(BinaryTextRelation relation) {
- this(getArg1(relation), getArg2(relation));
- }
-
- // HACK: arg1 is not always arg1 because of bugs in the reader
- private static Annotation getArg1(BinaryTextRelation rel) {
- RelationArgument arg1 = rel.getArg1();
- return arg1.getRole().equals("Argument") ? arg1.getArgument() : rel.getArg2().getArgument();
- }
-
- // HACK: arg2 is not always arg2 because of bugs in the reader
- private static Annotation getArg2(BinaryTextRelation rel) {
- RelationArgument arg2 = rel.getArg2();
- return arg2.getRole().equals("Related_to") ? arg2.getArgument() : rel.getArg1().getArgument();
- }
-
- @Override
- public boolean equals(Object otherObject) {
- boolean result = false;
- if (otherObject instanceof HashableArguments) {
- HashableArguments other = (HashableArguments) otherObject;
- result = (this.getClass() == other.getClass() &&
- this.arg1begin == other.arg1begin &&
- this.arg1end == other.arg1end &&
- this.arg2begin == other.arg2begin &&
- this.arg2end == other.arg2end);
- }
- return result;
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(
- this.arg1begin,
- this.arg1end,
- this.arg2begin,
- this.arg2end);
- }
-
- @Override
- public String toString() {
- return String.format(
- "%s(%s,%s,%s,%s)",
- this.getClass().getSimpleName(),
- this.arg1begin,
- this.arg1end,
- this.arg2begin,
- this.arg2end);
- }
- }
}
Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Fri Nov 30 09:32:15 2012
@@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -67,12 +68,13 @@ import com.google.common.base.Functions;
import com.google.common.base.Objects;
import com.google.common.base.Objects.ToStringHelper;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
+import com.google.common.collect.Sets;
import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.HashableArguments;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -115,6 +117,11 @@ public class RelationExtractorEvaluation
usage = "evaluate test performance on ctakes entities, instead of gold standard entities")
public boolean testOnCTakes = false;
+ @Option(
+ name = "--print-errors",
+ usage = "print relations that were incorrectly predicted")
+ public boolean printErrors = false;
+
}
public static final String GOLD_VIEW_NAME = "GoldView";
@@ -195,9 +202,7 @@ public class RelationExtractorEvaluation
RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
params.probabilityOfKeepingANegativeExample,
EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
- params.classifyBothDirections,
- RelationExtractorAnnotator.PARAM_PRINT_ERRORS,
- false };
+ params.classifyBothDirections };
// define arguments to be passed to the classifier
String[] trainingArguments = new String[] {
@@ -216,7 +221,8 @@ public class RelationExtractorEvaluation
dataWriterClass,
additionalParameters,
trainingArguments,
- options.testOnCTakes);
+ options.testOnCTakes,
+ options.printErrors);
if (options.devDirectory != null) {
if (options.testDirectory != null) {
@@ -300,7 +306,8 @@ public class RelationExtractorEvaluation
Class<? extends DataWriter<String>> dataWriterClass,
Object[] additionalParameters,
String[] trainingArguments,
- boolean testOnCTakes) {
+ boolean testOnCTakes,
+ boolean printErrors) {
super(baseDirectory);
this.relationCategory = relationCategory;
this.classifierAnnotatorClass = classifierAnnotatorClass;
@@ -308,6 +315,7 @@ public class RelationExtractorEvaluation
this.additionalParameters = additionalParameters;
this.trainingArguments = trainingArguments;
this.testOnCTakes = testOnCTakes;
+ this.printErrors = printErrors;
}
private String relationCategory;
@@ -321,6 +329,8 @@ public class RelationExtractorEvaluation
private String[] trainingArguments;
private boolean testOnCTakes;
+
+ private boolean printErrors;
@Override
public CollectionReader getCollectionReader(List<File> items) throws Exception {
@@ -442,6 +452,32 @@ public class RelationExtractorEvaluation
systemBinaryTextRelations,
getSpan,
getOutcome);
+
+ // print errors if requested
+ if (this.printErrors) {
+ Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : goldBinaryTextRelations) {
+ goldMap.put(new HashableArguments(relation), relation);
+ }
+ Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : systemBinaryTextRelations) {
+ systemMap.put(new HashableArguments(relation), relation);
+ }
+ Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+ List<HashableArguments> sorted = Lists.newArrayList(all);
+ Collections.sort(sorted);
+ for (HashableArguments key : sorted) {
+ BinaryTextRelation goldRelation = goldMap.get(key);
+ BinaryTextRelation systemRelation = systemMap.get(key);
+ if (goldRelation == null) {
+ System.out.println("System added: " + formatRelation(systemRelation));
+ } else if (systemRelation == null) {
+ System.out.println("System dropped: " + formatRelation(systemRelation));
+ } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
+ System.out.println("System misclassified: " + formatRelation(systemRelation));
+ }
+ }
+ }
}
System.err.printf("%s: %s:\n", this.relationCategory, directory.getName());
@@ -450,6 +486,22 @@ public class RelationExtractorEvaluation
System.err.println();
return stats;
}
+
+ private static String formatRelation(BinaryTextRelation relation) {
+ String text = relation.getCAS().getDocumentText();
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ int begin = Math.min(arg1.getBegin(), arg2.getBegin());
+ int end = Math.max(arg1.getBegin(), arg2.getBegin());
+ begin = Math.max(0, begin - 50);
+ end = Math.min(text.length(), end + 50);
+ return String.format(
+ "%s(%s, %s) in ...%s...",
+ relation.getCategory(),
+ arg1.getCoveredText(),
+ arg2.getCoveredText(),
+ text.substring(begin, end).replaceAll("[\r\n]", " "));
+ }
/**
* Holds a set of parameters for a relation extraction model
@@ -703,4 +755,89 @@ public class RelationExtractorEvaluation
}
}
}
+
+ /**
+ * This class is useful for mapping the spans of relation arguments to the relation's category.
+ */
+ public static class HashableArguments implements Comparable<HashableArguments> {
+
+ protected int arg1begin;
+
+ protected int arg1end;
+
+ protected int arg2begin;
+
+ protected int arg2end;
+
+ public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
+ this.arg1begin = arg1begin;
+ this.arg1end = arg1end;
+ this.arg2begin = arg2begin;
+ this.arg2end = arg2end;
+ }
+
+ public HashableArguments(Annotation arg1, Annotation arg2) {
+ this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
+ }
+
+ public HashableArguments(BinaryTextRelation relation) {
+ this(getArg1(relation), getArg2(relation));
+ }
+
+ // HACK: arg1 is not always arg1 because of bugs in the reader
+ private static Annotation getArg1(BinaryTextRelation rel) {
+ RelationArgument arg1 = rel.getArg1();
+ return arg1.getRole().equals("Argument") ? arg1.getArgument() : rel.getArg2().getArgument();
+ }
+
+ // HACK: arg2 is not always arg2 because of bugs in the reader
+ private static Annotation getArg2(BinaryTextRelation rel) {
+ RelationArgument arg2 = rel.getArg2();
+ return arg2.getRole().equals("Related_to")
+ ? arg2.getArgument()
+ : rel.getArg1().getArgument();
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ boolean result = false;
+ if (otherObject instanceof HashableArguments) {
+ HashableArguments other = (HashableArguments) otherObject;
+ result = (this.getClass() == other.getClass() && this.arg1begin == other.arg1begin
+ && this.arg1end == other.arg1end && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
+ }
+ return result;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(this.arg1begin, this.arg1end, this.arg2begin, this.arg2end);
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "%s(%s,%s,%s,%s)",
+ this.getClass().getSimpleName(),
+ this.arg1begin,
+ this.arg1end,
+ this.arg2begin,
+ this.arg2end);
+ }
+
+ @Override
+ public int compareTo(HashableArguments that) {
+ int thisBegin = Math.min(this.arg1begin, this.arg2begin);
+ int thatBegin = Math.min(that.arg1begin, that.arg2begin);
+ if (thisBegin < thatBegin) {
+ return -1;
+ } else if (thisBegin > thatBegin) {
+ return +1;
+ } else if (this.equals(that)) {
+ return 0;
+ } else {
+ return +1; // arbitrary choice for overlapping
+ }
+ }
+ }
}
Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java?rev=1415556&r1=1415555&r2=1415556&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java Fri Nov 30 09:32:15 2012
@@ -105,9 +105,7 @@ public class RelationExtractorTrain {
RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
params.probabilityOfKeepingANegativeExample,
EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
- params.classifyBothDirections,
- RelationExtractorAnnotator.PARAM_PRINT_ERRORS,
- false };
+ params.classifyBothDirections };
// define arguments to be passed to the classifier
String[] trainingArguments = new String[] {
@@ -125,6 +123,7 @@ public class RelationExtractorTrain {
dataWriterClass,
additionalParameters,
trainingArguments,
+ false,
false);
CollectionReader collectionReader = evaluation.getCollectionReader(trainFiles);