You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2015/08/06 22:44:50 UTC
svn commit: r1694571 - in
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor:
ae/LocationOfRelationExtractorAnnotator.java
eval/RelationExtractorEvaluation.java
Author: clin
Date: Thu Aug 6 20:44:50 2015
New Revision: 1694571
URL: http://svn.apache.org/r1694571
Log:
enable generating more training instances by event-expansion technique for "location of" relations.
To do expansion, add "--expand-events" in command line arguments.
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java?rev=1694571&r1=1694570&r2=1694571&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java Thu Aug 6 20:44:50 2015
@@ -19,12 +19,16 @@
package org.apache.ctakes.relationextractor.ae;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
+import java.util.Map;
+import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -38,53 +42,103 @@ import org.apache.uima.fit.util.JCasUtil
*/
public class LocationOfRelationExtractorAnnotator extends RelationExtractorAnnotator {
- @Override
- protected Class<? extends BinaryTextRelation> getRelationClass() {
- return LocationOfTextRelation.class;
- }
-
- @Override
- public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
- JCas identifiedAnnotationView,
- Annotation sentence) {
-
- List<EventMention> events =
- JCasUtil.selectCovered(identifiedAnnotationView, EventMention.class, sentence);
- List<AnatomicalSiteMention> sites =
- JCasUtil.selectCovered(identifiedAnnotationView, AnatomicalSiteMention.class, sentence);
-
- List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
- for (EventMention event : events) {
- for (AnatomicalSiteMention site : sites) {
- pairs.add(new IdentifiedAnnotationPair(event, site));
- }
- }
- return pairs;
- }
-
- @Override
- protected void createRelation(
- JCas jCas,
- IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2,
- String predictedCategory) {
- RelationArgument relArg1 = new RelationArgument(jCas);
- relArg1.setArgument(arg1);
- relArg1.setRole("Argument");
- relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(jCas);
- relArg2.setArgument(arg2);
- relArg2.setRole("Related_to");
- relArg2.addToIndexes();
- LocationOfTextRelation relation = new LocationOfTextRelation(jCas);
- relation.setArg1(relArg1);
- relation.setArg2(relArg2);
- relation.setCategory(predictedCategory);
- relation.addToIndexes();
- }
-
- @Override
- protected Class<? extends Annotation> getCoveringClass() {
- return Sentence.class;
- }
+ @Override
+ protected Class<? extends BinaryTextRelation> getRelationClass() {
+ return LocationOfTextRelation.class;
+ }
+
+ @Override
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas identifiedAnnotationView,
+ Annotation sentence) {
+
+ List<EventMention> events =
+ JCasUtil.selectCovered(identifiedAnnotationView, EventMention.class, sentence);
+ List<AnatomicalSiteMention> sites =
+ JCasUtil.selectCovered(identifiedAnnotationView, AnatomicalSiteMention.class, sentence);
+
+ List<IdentifiedAnnotationPair> pairs = new ArrayList<>();
+
+ if(RelationExtractorEvaluation.expandEvent){//if expand
+ Map<EventMention, Collection<EventMention>> coveredMap =
+ JCasUtil.indexCovered(identifiedAnnotationView, EventMention.class, EventMention.class);
+ Map<EventMention, Collection<EventMention>> coveringMap =
+ JCasUtil.indexCovering(identifiedAnnotationView, EventMention.class, EventMention.class);
+ Map<AnatomicalSiteMention, Collection<EventMention>> siteEventMap =
+ JCasUtil.indexCovered(identifiedAnnotationView, AnatomicalSiteMention.class, EventMention.class);
+ Map<AnatomicalSiteMention, Collection<EntityMention>> siteEntityMap =
+ JCasUtil.indexCovering(identifiedAnnotationView, AnatomicalSiteMention.class, EntityMention.class);
+
+ final List<IdentifiedAnnotation> eventList = new ArrayList<>();
+ for (EventMention event : events) {
+ eventList.addAll(coveringMap.get(event));
+ eventList.addAll(coveredMap.get(event));
+ for(IdentifiedAnnotation covEvent : eventList){
+ for (AnatomicalSiteMention site : sites) {
+ if(!hasOverlap(covEvent,site)){
+ pairs.add(new IdentifiedAnnotationPair(covEvent, site));
+ }
+ }
+ }
+ eventList.clear();
+ for (AnatomicalSiteMention site : sites) {
+ pairs.add(new IdentifiedAnnotationPair(event, site));
+ eventList.addAll(siteEventMap.get(site));
+ eventList.addAll(siteEntityMap.get(site));
+ for(IdentifiedAnnotation covSite : eventList){
+ if(!hasOverlap(event,covSite)){
+ pairs.add(new IdentifiedAnnotationPair(event, covSite));
+ }
+ }
+ eventList.clear();
+ }
+
+ }
+ }else{//id don't expand
+ for (EventMention event : events) {
+ for (AnatomicalSiteMention site : sites) {
+ pairs.add(new IdentifiedAnnotationPair(event, site));
+ }
+ }
+ }
+
+
+ return pairs;
+ }
+
+ private static boolean hasOverlap(Annotation event1, Annotation event2) {
+ if(event1.getEnd()>=event2.getBegin()&&event1.getEnd()<=event2.getEnd()){
+ return true;
+ }
+ if(event2.getEnd()>=event1.getBegin()&&event2.getEnd()<=event1.getEnd()){
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ protected void createRelation(
+ JCas jCas,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2,
+ String predictedCategory) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ LocationOfTextRelation relation = new LocationOfTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.addToIndexes();
+ }
+
+ @Override
+ protected Class<? extends Annotation> getCoveringClass() {
+ return Sentence.class;
+ }
}
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1694571&r1=1694570&r2=1694571&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Thu Aug 6 20:44:50 2015
@@ -42,6 +42,8 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation;
import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -83,669 +85,774 @@ import com.lexicalscope.jewel.cli.Option
public class RelationExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
- public static interface Options extends SHARPXMI.EvaluationOptions {
+ public static interface Options extends SHARPXMI.EvaluationOptions {
- @Option(
- longName = "relations",
- description = "determines which relations to evaluate on (separately)",
- defaultValue = { "degree_of", "location_of" })
- public List<String> getRelations();
-
- @Option(
- longName = "test-on-ctakes",
- description = "evaluate test performance on ctakes entities, instead of gold standard "
- + "entities")
- public boolean getTestOnCTakes();
-
- @Option(
- longName = "allow-smaller-system-arguments",
- description = "for evaluation, allow system relation arguments to match gold relation "
- + "arguments that enclose them")
- public boolean getAllowSmallerSystemArguments();
-
- @Option(
- longName = "ignore-impossible-gold-relations",
- description = "for evaluation, ignore gold relations that would be impossible to find "
- + "because there are no corresponding system mentions")
- public boolean getIgnoreImpossibleGoldRelations();
-
- @Option(
- longName = "--print-errors",
- description = "print relations that were incorrectly predicted")
- public boolean getPrintErrors();
-
- }
-
- public static final Map<String, Class<? extends BinaryTextRelation>> RELATION_CLASSES =
- Maps.newHashMap();
- public static final Map<Class<? extends BinaryTextRelation>, Class<? extends RelationExtractorAnnotator>> ANNOTATOR_CLASSES =
- Maps.newHashMap();
- public static final Map<Class<? extends BinaryTextRelation>, ParameterSettings> BEST_PARAMETERS =
- Maps.newHashMap();
-
- static {
- RELATION_CLASSES.put("degree_of", DegreeOfTextRelation.class);
- ANNOTATOR_CLASSES.put(DegreeOfTextRelation.class, DegreeOfRelationExtractorAnnotator.class);
- BEST_PARAMETERS.put(DegreeOfTextRelation.class, new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- 1.0f },
- new String[] { "-s", "1", "-c", "0.1" }));
-
- RELATION_CLASSES.put("location_of", LocationOfTextRelation.class);
- ANNOTATOR_CLASSES.put(LocationOfTextRelation.class, LocationOfRelationExtractorAnnotator.class);
- BEST_PARAMETERS.put(LocationOfTextRelation.class, new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- 1.0f },
- new String[] { "-s", "0", "-c", "50.0" }));
-
- RELATION_CLASSES.put("manages/treats", ManagesTreatsTextRelation.class);
- ANNOTATOR_CLASSES.put(ManagesTreatsTextRelation.class, ManagesTreatsRelationExtractorAnnotator.class);
- BEST_PARAMETERS.put(ManagesTreatsTextRelation.class, new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- 0.5f },
- new String[] { "-s", "0", "-c", "5.0" }));
-
- RELATION_CLASSES.put("causes/brings_about", CausesBringsAboutTextRelation.class);
- ANNOTATOR_CLASSES.put(CausesBringsAboutTextRelation.class, CausesBringsAboutRelationExtractorAnnotator.class);
- BEST_PARAMETERS.put(CausesBringsAboutTextRelation.class, new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- 0.5f },
- new String[] { "-s", "0", "-c", "1.0" }));
-
- RELATION_CLASSES.put("manifestation_of", ManifestationOfTextRelation.class);
- ANNOTATOR_CLASSES.put(ManifestationOfTextRelation.class, ManifestationOfRelationExtractorAnnotator.class);
- BEST_PARAMETERS.put(ManifestationOfTextRelation.class, new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- 0.5f },
- new String[] { "-s", "0", "-c", "1.0" }));
- }
-
- public static void main(String[] args) throws Exception {
- // parse the options, validate them, and generate XMI if necessary
- final Options options = CliFactory.parseArguments(Options.class, args);
- SHARPXMI.validate(options);
- SHARPXMI.generateXMI(options);
-
- // determine the grid of parameters to search through
- // for the full set of LibLinear parameters, see:
- // https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
- List<ParameterSettings> gridOfSettings = Lists.newArrayList();
- for (float probabilityOfKeepingANegativeExample : new float[] { 0.5f, 1.0f }) {
- for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
- for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
- gridOfSettings.add(new ParameterSettings(
- LibLinearStringOutcomeDataWriter.class,
- new Object[] {
- RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
- probabilityOfKeepingANegativeExample },
- new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
- }
- }
- }
-
- // run an evaluation for each selected relation
- for (final String relationCategory : options.getRelations()) {
-
- // get the best parameters for the relation
- final Class<? extends BinaryTextRelation> relationClass =
- RELATION_CLASSES.get(relationCategory);
- ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);
-
- // run the evaluation
- SHARPXMI.evaluate(
- options,
- bestSettings,
- gridOfSettings,
- new Function<ParameterSettings, RelationExtractorEvaluation>() {
- @Override
- public RelationExtractorEvaluation apply(@Nullable ParameterSettings params) {
- return new RelationExtractorEvaluation(
- new File("target/models/" + relationCategory),
- relationClass,
- ANNOTATOR_CLASSES.get(relationClass),
- params,
- options.getTestOnCTakes(),
- options.getAllowSmallerSystemArguments(),
- options.getIgnoreImpossibleGoldRelations(),
- options.getPrintErrors());
- }
- });
- }
- }
-
- private Class<? extends BinaryTextRelation> relationClass;
-
- private Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass;
-
- private ParameterSettings parameterSettings;
-
- private boolean testOnCTakes;
-
- private boolean allowSmallerSystemArguments;
-
- private boolean ignoreImpossibleGoldRelations;
-
- private boolean printErrors;
-
- /**
- * An evaluation of a relation extractor.
- *
- * @param baseDirectory
- * The directory where models, etc. should be written
- * @param relationClass
- * The class of the relation to be predicted
- * @param classifierAnnotatorClass
- * The CleartkAnnotator class that learns a relation extractor model
- * @param parameterSettings
- * The parameters defining how to train a classifier
- * @param testOnCTakes
- * During testing, use annotations from cTAKES, not from the gold
- * standard
- * @param allowSmallerSystemArguments
- * During testing, allow system annotations to match gold annotations
- * that enclose them
- * @param ignoreImpossibleGoldRelations
- * During testing, ignore gold relations that would be impossible to
- * find because there are no corresponding system mentions
- */
- public RelationExtractorEvaluation(
- File baseDirectory,
- Class<? extends BinaryTextRelation> relationClass,
- Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
- ParameterSettings parameterSettings,
- boolean testOnCTakes,
- boolean allowSmallerSystemArguments,
- boolean ignoreImpossibleGoldRelations,
- boolean printErrors) {
- super(baseDirectory);
- this.relationClass = relationClass;
- this.classifierAnnotatorClass = classifierAnnotatorClass;
- this.parameterSettings = parameterSettings;
- this.testOnCTakes = testOnCTakes;
- this.allowSmallerSystemArguments = allowSmallerSystemArguments;
- this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations;
- this.printErrors = printErrors;
- }
-
- public RelationExtractorEvaluation(
- File baseDirectory,
- Class<? extends BinaryTextRelation> relationClass,
- Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
- ParameterSettings parameterSettings) {
- this(
- baseDirectory,
- relationClass,
- classifierAnnotatorClass,
- parameterSettings,
- false,
- false,
- false,
- false);
- }
-
- @Override
- public void train(CollectionReader collectionReader, File directory) throws Exception {
- System.err.printf(
- "%s: %s: %s:\n",
- this.getClass().getSimpleName(),
- this.relationClass.getSimpleName(),
- directory.getName());
- System.err.println(this.parameterSettings);
-
- AggregateBuilder builder = new AggregateBuilder();
- // remove cTAKES entity mentions and modifiers in the system view and copy
- // in the gold relations
- builder.add(AnalysisEngineFactory.createEngineDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
- // add the relation extractor, configured for training mode
- AnalysisEngineDescription classifierAnnotator =
- AnalysisEngineFactory.createEngineDescription(
- this.classifierAnnotatorClass,
- this.parameterSettings.configurationParameters);
- ConfigurationParameterFactory.addConfigurationParameters(
- classifierAnnotator,
- DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
- this.parameterSettings.dataWriterClass,
- DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
- directory.getPath());
- builder.add(classifierAnnotator);
-
- // run the data-writing pipeline
- SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
-
- // train the classifier and package it into a .jar file
- JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
- }
-
- @Override
- protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
- throws Exception {
- AggregateBuilder builder = new AggregateBuilder();
- if (this.testOnCTakes) {
- // add the modifier extractor
- File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
- XMLInputSource source = new XMLInputSource(file);
- builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
- // remove extraneous entity mentions
- builder.add(AnalysisEngineFactory.createEngineDescription(RemoveSmallerEventMentions.class));
- } else {
- // replace cTAKES entity mentions and modifiers in the system view with
- // the gold annotations
- builder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class));
- }
- // add the relation extractor, configured for classification mode
- AnalysisEngineDescription classifierAnnotator =
- AnalysisEngineFactory.createEngineDescription(
- this.classifierAnnotatorClass,
- this.parameterSettings.configurationParameters);
- ConfigurationParameterFactory.addConfigurationParameters(
- classifierAnnotator,
- GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
- JarClassifierBuilder.getModelJarFile(directory));
- builder.add(classifierAnnotator);
-
- // statistics will be based on the "category" feature of the
- // BinaryTextRelations
- AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
- Function<BinaryTextRelation, HashableArguments> getSpan =
- new Function<BinaryTextRelation, HashableArguments>() {
- @Override
- public HashableArguments apply(BinaryTextRelation relation) {
- return new HashableArguments(relation);
- }
- };
- Function<BinaryTextRelation, String> getOutcome =
- AnnotationStatistics.annotationToFeatureValue("category");
-
- // calculate statistics, iterating over the results of the classifier
- AnalysisEngine engine = builder.createAggregate();
- for (Iterator<JCas> casIter = new JCasIterator(collectionReader, engine); casIter.hasNext();) {
- JCas jCas = casIter.next();
- // get the gold view
- JCas goldView;
- try {
- goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
- } catch (CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
-
- // get the gold and system annotations
- Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
- JCasUtil.select(goldView, this.relationClass);
- Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
- JCasUtil.select(jCas, this.relationClass);
-
- if (this.ignoreImpossibleGoldRelations) {
- // collect only relations where both arguments have some possible system
- // arguments
- List<BinaryTextRelation> relations = Lists.newArrayList();
- for (BinaryTextRelation relation : goldBinaryTextRelations) {
- boolean hasSystemArgs = true;
- for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
- IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
- Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
- boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
- hasSystemArgs = hasSystemArgs && !noSystemArg;
- }
- if (hasSystemArgs) {
- relations.add(relation);
- } else {
- IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
- IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
- String messageFormat =
- "removing relation between %s and %s which is impossible to "
- + "find with system mentions";
- String message = String.format(messageFormat, format(arg1), format(arg2));
- UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
- }
- }
- goldBinaryTextRelations = relations;
- }
-
- if (this.allowSmallerSystemArguments) {
-
- // collect all the arguments of the manually annotated relations
- Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
- for (BinaryTextRelation relation : goldBinaryTextRelations) {
- for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
- goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
- }
- }
-
- // collect all the arguments of system-predicted relations that don't
- // match some gold argument
- Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
- for (BinaryTextRelation relation : systemBinaryTextRelations) {
- for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
- IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
- Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
- boolean matchesSomeGold = false;
- for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
- goldView,
- systemClass,
- systemArg)) {
- if (goldArg.getBegin() == systemArg.getBegin()
- && goldArg.getEnd() == systemArg.getEnd()) {
- matchesSomeGold = true;
- break;
- }
- }
- if (!matchesSomeGold) {
- unmatchedSystemArgs.add(systemArg);
- }
- }
- }
-
- // map each unmatched system argument to the gold argument that encloses
- // it
- Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
- for (IdentifiedAnnotation goldArg : goldArgs) {
- Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
- for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
- if (unmatchedSystemArgs.contains(systemArg)) {
-
- // if there's no mapping yet for this system arg, map it to the
- // enclosing gold arg
- IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
- if (oldGoldArg == null) {
- systemToGold.put(systemArg, goldArg);
- }
-
- // if there's already a mapping for this system arg, only re-map
- // it to match the type
- else {
- IdentifiedAnnotation current, other;
- if (systemArg.getTypeID() == goldArg.getTypeID()) {
- systemToGold.put(systemArg, goldArg);
- current = goldArg;
- other = oldGoldArg;
- } else {
- current = oldGoldArg;
- other = goldArg;
- }
-
- // issue a warning since this re-mapping procedure is imperfect
- String message =
- "system argument %s mapped to gold argument %s, but could also be mapped to %s";
- message = String.format(message, format(systemArg), format(current), format(other));
- UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
- }
- }
- }
- }
-
- // replace system arguments with gold arguments where necessary/possible
- for (BinaryTextRelation relation : systemBinaryTextRelations) {
- for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
- IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
- IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
- if (matchingGoldArg != null) {
- String messageFormat = "replacing system argument %s with gold argument %s";
- String message =
- String.format(messageFormat, format(systemArg), format(matchingGoldArg));
- UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
- relArg.setArgument(matchingGoldArg);
- }
- }
- }
- }
-
- // update the statistics based on the argument spans of the relation
- stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);
-
- // print errors if requested
- if (this.printErrors) {
- Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
- for (BinaryTextRelation relation : goldBinaryTextRelations) {
- goldMap.put(new HashableArguments(relation), relation);
- }
- Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
- for (BinaryTextRelation relation : systemBinaryTextRelations) {
- systemMap.put(new HashableArguments(relation), relation);
- }
- Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
- List<HashableArguments> sorted = Lists.newArrayList(all);
- Collections.sort(sorted);
- for (HashableArguments key : sorted) {
- BinaryTextRelation goldRelation = goldMap.get(key);
- BinaryTextRelation systemRelation = systemMap.get(key);
- if (goldRelation == null) {
- System.out.println("System added: " + formatRelation(systemRelation));
- } else if (systemRelation == null) {
- System.out.println("System dropped: " + formatRelation(goldRelation));
- } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
- String label = systemRelation.getCategory();
- System.out.printf("System labeled %s for %s\n", label, formatRelation(systemRelation));
- }
- }
- }
- }
-
- System.err.print(stats);
- System.err.println();
- return stats;
- }
-
- private static String formatRelation(BinaryTextRelation relation) {
- IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
- IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
- String text = arg1.getCAS().getDocumentText();
- int begin = Math.min(arg1.getBegin(), arg2.getBegin());
- int end = Math.max(arg1.getBegin(), arg2.getBegin());
- begin = Math.max(0, begin - 50);
- end = Math.min(text.length(), end + 50);
- return String.format(
- "%s(%s(type=%d), %s(type=%d)) in ...%s...",
- relation.getCategory(),
- arg1.getCoveredText(),
- arg1.getTypeID(),
- arg2.getCoveredText(),
- arg2.getTypeID(),
- text.substring(begin, end).replaceAll("[\r\n]", " "));
- }
-
- /**
- * Annotator that removes cTAKES mentions in the system view and copies
- * relations from the gold view to the system view
- */
- public static class RemoveCTakesMentionsAndCopyGoldRelations extends JCasAnnotator_ImplBase {
-
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException {
- JCas goldView, systemView;
- try {
- goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
- systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
- } catch (CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
-
- // remove cTAKES Mentions and Modifiers from system view
- List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
- cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
- cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
- cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
- for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
- cTakesMention.removeFromIndexes();
- }
-
- // copy gold Mentions and Modifiers to the system view
- List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
- goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
- CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
- Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
- for (IdentifiedAnnotation goldMention : goldMentions) {
- Annotation copy = (Annotation) copier.copyFs(goldMention);
- copy.setFeatureValue(sofaFeature, systemView.getSofa());
- copy.addToIndexes();
- }
-
- // copy gold relations to the system view
- for (BinaryTextRelation goldRelation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
- BinaryTextRelation relation = (BinaryTextRelation) copier.copyFs(goldRelation);
- relation.addToIndexes(systemView);
- for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
- relArg.addToIndexes(systemView);
- // relArg.getArgument() should have been added to indexes with
- // mentions above
- }
- }
- }
- }
-
- /**
- * Annotator that removes cTAKES Mentions and Modifiers from the system view,
- * and copies over the manually annotated Mentions and Modifiers from the gold
- * view.
- */
- public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase {
-
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException {
- JCas goldView, systemView;
- try {
- goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
- systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
- } catch (CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
-
- // remove cTAKES Mentions and Modifiers from system view
- List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
- cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
- cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
- cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
- for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
- cTakesMention.removeFromIndexes();
- }
-
- // copy gold Mentions and Modifiers to the system view
- List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
- goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
- CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
- for (IdentifiedAnnotation goldMention : goldMentions) {
- Annotation copy = (Annotation) copier.copyFs(goldMention);
- Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
- copy.setFeatureValue(sofaFeature, systemView.getSofa());
- copy.addToIndexes();
- }
- }
- }
-
- static String format(IdentifiedAnnotation a) {
- return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
- }
-
- public static class RemoveSmallerEventMentions extends JCasAnnotator_ImplBase {
-
- @Override
- public void process(JCas jCas) throws AnalysisEngineProcessException {
- Collection<EventMention> mentions = JCasUtil.select(jCas, EventMention.class);
- for (EventMention mention : Lists.newArrayList(mentions)) {
- int begin = mention.getBegin();
- int end = mention.getEnd();
- int typeID = mention.getTypeID();
- List<EventMention> subMentions = JCasUtil.selectCovered(jCas, EventMention.class, mention);
- for (EventMention subMention : subMentions) {
- if (subMention.getBegin() > begin || subMention.getEnd() < end) {
- if (subMention.getTypeID() == typeID) {
- String message =
- String.format("removed %s inside %s", format(subMention), format(mention));
- this.getContext().getLogger().log(Level.WARNING, message);
- subMention.removeFromIndexes();
- }
- }
- }
- }
- }
- }
-
- /**
- * This class is useful for mapping the spans of relation arguments to the
- * relation's category.
- */
- public static class HashableArguments implements Comparable<HashableArguments> {
-
- protected int arg1begin;
-
- protected int arg1end;
-
- protected int arg2begin;
-
- protected int arg2end;
-
- public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
- this.arg1begin = arg1begin;
- this.arg1end = arg1end;
- this.arg2begin = arg2begin;
- this.arg2end = arg2end;
- }
-
- public HashableArguments(Annotation arg1, Annotation arg2) {
- this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
- }
-
- public HashableArguments(BinaryTextRelation relation) {
- this(relation.getArg1().getArgument(), relation.getArg2().getArgument());
- }
-
- @Override
- public boolean equals(Object otherObject) {
- boolean result = false;
- if (otherObject instanceof HashableArguments) {
- HashableArguments other = (HashableArguments) otherObject;
- result =
- (this.getClass() == other.getClass()
- && this.arg1begin == other.arg1begin
- && this.arg1end == other.arg1end
- && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
- }
- return result;
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(this.arg1begin, this.arg1end, this.arg2begin, this.arg2end);
- }
-
- @Override
- public String toString() {
- return String.format(
- "%s(%s,%s,%s,%s)",
- this.getClass().getSimpleName(),
- this.arg1begin,
- this.arg1end,
- this.arg2begin,
- this.arg2end);
- }
-
- @Override
- public int compareTo(HashableArguments that) {
- int thisBegin = Math.min(this.arg1begin, this.arg2begin);
- int thatBegin = Math.min(that.arg1begin, that.arg2begin);
- int thisEnd = Math.max(this.arg1end, this.arg2end);
- int thatEnd = Math.max(that.arg1end, that.arg2end);
-
- if (thisBegin < thatBegin) {
- return -1;
- } else if (thisBegin > thatBegin) {
- return +1;
- } else if (this.equals(that)) {
- return 0;
- } else if (thisEnd < thatEnd) {
- return -1;
- } else{
- return 1;
- }
- }
+ @Option(
+ longName = "relations",
+ description = "determines which relations to evaluate on (separately)",
+ defaultValue = { "degree_of", "location_of" })
+ public List<String> getRelations();
+
+ @Option(
+ longName = "test-on-ctakes",
+ description = "evaluate test performance on ctakes entities, instead of gold standard "
+ + "entities")
+ public boolean getTestOnCTakes();
+
+ @Option(
+ longName = "allow-smaller-system-arguments",
+ description = "for evaluation, allow system relation arguments to match gold relation "
+ + "arguments that enclose them")
+ public boolean getAllowSmallerSystemArguments();
+
+ @Option(
+ longName = "ignore-impossible-gold-relations",
+ description = "for evaluation, ignore gold relations that would be impossible to find "
+ + "because there are no corresponding system mentions")
+ public boolean getIgnoreImpossibleGoldRelations();
+
+ @Option(
+ longName = "--print-errors",
+ description = "print relations that were incorrectly predicted")
+ public boolean getPrintErrors();
+
+ @Option(
+ longName = "expand-events",
+ description = "expand events to their covering or covered events")
+ public boolean getExpandEvents();
+
+ }
+
+ public static final Map<String, Class<? extends BinaryTextRelation>> RELATION_CLASSES =
+ Maps.newHashMap();
+ public static final Map<Class<? extends BinaryTextRelation>, Class<? extends RelationExtractorAnnotator>> ANNOTATOR_CLASSES =
+ Maps.newHashMap();
+ public static final Map<Class<? extends BinaryTextRelation>, ParameterSettings> BEST_PARAMETERS =
+ Maps.newHashMap();
+
+ static {
+ RELATION_CLASSES.put("degree_of", DegreeOfTextRelation.class);
+ ANNOTATOR_CLASSES.put(DegreeOfTextRelation.class, DegreeOfRelationExtractorAnnotator.class);
+ BEST_PARAMETERS.put(DegreeOfTextRelation.class, new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ 1.0f },
+ new String[] { "-s", "1", "-c", "0.1" }));
+
+ RELATION_CLASSES.put("location_of", LocationOfTextRelation.class);
+ ANNOTATOR_CLASSES.put(LocationOfTextRelation.class, LocationOfRelationExtractorAnnotator.class);
+ BEST_PARAMETERS.put(LocationOfTextRelation.class, new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ 0.5f },//1.0f },
+ new String[] { "-s", "0", "-c", "50.0" }));
+
+ RELATION_CLASSES.put("manages/treats", ManagesTreatsTextRelation.class);
+ ANNOTATOR_CLASSES.put(ManagesTreatsTextRelation.class, ManagesTreatsRelationExtractorAnnotator.class);
+ BEST_PARAMETERS.put(ManagesTreatsTextRelation.class, new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ 0.5f },
+ new String[] { "-s", "0", "-c", "5.0" }));
+
+ RELATION_CLASSES.put("causes/brings_about", CausesBringsAboutTextRelation.class);
+ ANNOTATOR_CLASSES.put(CausesBringsAboutTextRelation.class, CausesBringsAboutRelationExtractorAnnotator.class);
+ BEST_PARAMETERS.put(CausesBringsAboutTextRelation.class, new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ 0.5f },
+ new String[] { "-s", "0", "-c", "1.0" }));
+
+ RELATION_CLASSES.put("manifestation_of", ManifestationOfTextRelation.class);
+ ANNOTATOR_CLASSES.put(ManifestationOfTextRelation.class, ManifestationOfRelationExtractorAnnotator.class);
+ BEST_PARAMETERS.put(ManifestationOfTextRelation.class, new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ 0.5f },
+ new String[] { "-s", "0", "-c", "1.0" }));
+ }
+
+ public static void main(String[] args) throws Exception {
+ // parse the options, validate them, and generate XMI if necessary
+ final Options options = CliFactory.parseArguments(Options.class, args);
+ SHARPXMI.validate(options);
+ SHARPXMI.generateXMI(options);
+
+ // determine the grid of parameters to search through
+ // for the full set of LibLinear parameters, see:
+ // https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
+ List<ParameterSettings> gridOfSettings = Lists.newArrayList();
+ for (float probabilityOfKeepingANegativeExample : new float[] { 0.5f, 1.0f }) {
+ for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
+ for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
+ gridOfSettings.add(new ParameterSettings(
+ LibLinearStringOutcomeDataWriter.class,
+ new Object[] {
+ RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ probabilityOfKeepingANegativeExample },
+ new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
+ }
+ }
+ }
+
+ // run an evaluation for each selected relation
+ for (final String relationCategory : options.getRelations()) {
+
+ // get the best parameters for the relation
+ final Class<? extends BinaryTextRelation> relationClass =
+ RELATION_CLASSES.get(relationCategory);
+ ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);
+
+ // run the evaluation
+ SHARPXMI.evaluate(
+ options,
+ bestSettings,
+ gridOfSettings,
+ new Function<ParameterSettings, RelationExtractorEvaluation>() {
+ @Override
+ public RelationExtractorEvaluation apply(@Nullable ParameterSettings params) {
+ return new RelationExtractorEvaluation(
+ new File("target/models/" + relationCategory),
+ relationClass,
+ ANNOTATOR_CLASSES.get(relationClass),
+ params,
+ options.getTestOnCTakes(),
+ options.getAllowSmallerSystemArguments(),
+ options.getIgnoreImpossibleGoldRelations(),
+ options.getPrintErrors(),
+ options.getExpandEvents());
+ }
+ });
+ }
+ }
+
+ private Class<? extends BinaryTextRelation> relationClass;
+
+ private Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass;
+
+ private ParameterSettings parameterSettings;
+
+ private boolean testOnCTakes;
+
+ private boolean allowSmallerSystemArguments;
+
+ private boolean ignoreImpossibleGoldRelations;
+
+ private boolean printErrors;
+
+ public static boolean expandEvent = false;
+
+ /**
+ * An evaluation of a relation extractor.
+ *
+ * @param baseDirectory
+ * The directory where models, etc. should be written
+ * @param relationClass
+ * The class of the relation to be predicted
+ * @param classifierAnnotatorClass
+ * The CleartkAnnotator class that learns a relation extractor model
+ * @param parameterSettings
+ * The parameters defining how to train a classifier
+ * @param testOnCTakes
+ * During testing, use annotations from cTAKES, not from the gold
+ * standard
+ * @param allowSmallerSystemArguments
+ * During testing, allow system annotations to match gold annotations
+ * that enclose them
+ * @param ignoreImpossibleGoldRelations
+ * During testing, ignore gold relations that would be impossible to
+ * find because there are no corresponding system mentions
+ * @param expandEvent
+ */
+ public RelationExtractorEvaluation(
+ File baseDirectory,
+ Class<? extends BinaryTextRelation> relationClass,
+ Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
+ ParameterSettings parameterSettings,
+ boolean testOnCTakes,
+ boolean allowSmallerSystemArguments,
+ boolean ignoreImpossibleGoldRelations,
+ boolean printErrors, boolean expandEventParameter) {
+ super(baseDirectory);
+ this.relationClass = relationClass;
+ this.classifierAnnotatorClass = classifierAnnotatorClass;
+ this.parameterSettings = parameterSettings;
+ this.testOnCTakes = testOnCTakes;
+ this.allowSmallerSystemArguments = allowSmallerSystemArguments;
+ this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations;
+ this.printErrors = printErrors;
+ expandEvent = expandEventParameter;
+ }
+
+ public RelationExtractorEvaluation(
+ File baseDirectory,
+ Class<? extends BinaryTextRelation> relationClass,
+ Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
+ ParameterSettings parameterSettings) {
+ this(
+ baseDirectory,
+ relationClass,
+ classifierAnnotatorClass,
+ parameterSettings,
+ false,
+ false,
+ false,
+ false,
+ false);
+ }
+
+ @Override
+ public void train(CollectionReader collectionReader, File directory) throws Exception {
+ System.err.printf(
+ "%s: %s: %s:\n",
+ this.getClass().getSimpleName(),
+ this.relationClass.getSimpleName(),
+ directory.getName());
+ System.err.println(this.parameterSettings);
+
+ AggregateBuilder builder = new AggregateBuilder();
+ // remove cTAKES entity mentions and modifiers in the system view and copy
+ // in the gold relations
+ builder.add(AnalysisEngineFactory.createEngineDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
+
+ //add potential events for training:
+ if (expandEvent && this.relationClass.getSimpleName().equals("LocationOfTextRelation") )
+ builder.add(AnalysisEngineFactory.createEngineDescription(AddPotentialRelations.class));
+
+ // add the relation extractor, configured for training mode
+ AnalysisEngineDescription classifierAnnotator =
+ AnalysisEngineFactory.createEngineDescription(
+ this.classifierAnnotatorClass,
+ this.parameterSettings.configurationParameters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ classifierAnnotator,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ this.parameterSettings.dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ directory.getPath());
+ builder.add(classifierAnnotator);
+
+ // run the data-writing pipeline
+ SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
+
+ // train the classifier and package it into a .jar file
+ JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
+ }
+
+ public static class AddPotentialRelations extends JCasAnnotator_ImplBase {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas relationView = jCas;
+
+ Map<EventMention, Collection<EventMention>> coveredMap =
+ JCasUtil.indexCovered(relationView, EventMention.class, EventMention.class);
+ Map<EventMention, Collection<EventMention>> coveringMap =
+ JCasUtil.indexCovering(relationView, EventMention.class, EventMention.class);
+ Map<AnatomicalSiteMention, Collection<EventMention>> siteEventMap =
+ JCasUtil.indexCovered(relationView, AnatomicalSiteMention.class, EventMention.class);
+ Map<AnatomicalSiteMention, Collection<EntityMention>> siteEntityMap =
+ JCasUtil.indexCovering(relationView, AnatomicalSiteMention.class, EntityMention.class);
+ final List<IdentifiedAnnotation> eventList = new ArrayList<>();
+ for(LocationOfTextRelation relation : Lists.newArrayList(JCasUtil.select(relationView, LocationOfTextRelation.class))){
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ EventMention event = null;
+ if(arg1 instanceof EventMention && arg2 instanceof AnatomicalSiteMention){
+ event = (EventMention) arg1;
+
+ eventList.addAll(coveringMap.get(event));
+ eventList.addAll(coveredMap.get(event));
+ for(IdentifiedAnnotation covEvent : eventList){
+ if(!covEvent.getClass().equals(EventMention.class) && !hasOverlap(covEvent, arg2)){
+ createRelation(relationView, covEvent, arg2, relation.getCategory());
+ }
+ }
+ eventList.clear();
+ eventList.addAll(siteEventMap.get(arg2));
+ eventList.addAll(siteEntityMap.get(arg2));
+ for(IdentifiedAnnotation covSite : eventList){
+ if(!covSite.getClass().equals(EventMention.class) && !hasOverlap(arg1, covSite)){
+ createRelation(relationView, event, covSite, relation.getCategory());
+ }
+ }
+ eventList.clear();
+ }else if(arg2 instanceof EventMention && arg1 instanceof AnatomicalSiteMention){
+ event = (EventMention) arg2;
+ eventList.addAll(coveringMap.get(event));
+ eventList.addAll(coveredMap.get(event));
+ for(IdentifiedAnnotation covEvent : eventList){
+ if(!covEvent.getClass().equals(EventMention.class)&& !hasOverlap(arg1, covEvent)){
+ createRelation(relationView, arg1, covEvent, relation.getCategory());
+ }
+ }
+ eventList.clear();
+ eventList.addAll(siteEventMap.get(arg1));
+ eventList.addAll(siteEntityMap.get(arg1));
+ for(IdentifiedAnnotation covSite : eventList){
+ if(!covSite.getClass().equals(EventMention.class) && !hasOverlap(covSite, arg2)){
+ createRelation(relationView, covSite, event, relation.getCategory());
+ }
+ }
+ eventList.clear();
+ }
+ }
+
+ }
+
+ private static boolean hasOverlap(Annotation event1, Annotation event2) {
+ if(event1.getEnd()>=event2.getBegin()&&event1.getEnd()<=event2.getEnd()){
+ return true;
+ }
+ if(event2.getEnd()>=event1.getBegin()&&event2.getEnd()<=event1.getEnd()){
+ return true;
+ }
+ return false;
+ }
+
+ private static void createRelation(JCas jCas, Annotation arg1,
+ Annotation arg2, String category) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(category);
+ relation.addToIndexes();
+
+ }
+ }
+
+ @Override
+ protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
+ throws Exception {
+ AggregateBuilder builder = new AggregateBuilder();
+ if (this.testOnCTakes) {
+ // add the modifier extractor
+ File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
+ XMLInputSource source = new XMLInputSource(file);
+ builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
+ // remove extraneous entity mentions
+ builder.add(AnalysisEngineFactory.createEngineDescription(RemoveSmallerEventMentions.class));
+ } else {
+ // replace cTAKES entity mentions and modifiers in the system view with
+ // the gold annotations
+ builder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class));
+ }
+ // add the relation extractor, configured for classification mode
+ AnalysisEngineDescription classifierAnnotator =
+ AnalysisEngineFactory.createEngineDescription(
+ this.classifierAnnotatorClass,
+ this.parameterSettings.configurationParameters);
+ ConfigurationParameterFactory.addConfigurationParameters(
+ classifierAnnotator,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ JarClassifierBuilder.getModelJarFile(directory));
+ builder.add(classifierAnnotator);
+
+ // statistics will be based on the "category" feature of the
+ // BinaryTextRelations
+ AnnotationStatistics<String> stats = new AnnotationStatistics<>();
+ Function<BinaryTextRelation, HashableArguments> getSpan =
+ new Function<BinaryTextRelation, HashableArguments>() {
+ @Override
+ public HashableArguments apply(BinaryTextRelation relation) {
+ return new HashableArguments(relation);
+ }
+ };
+ Function<BinaryTextRelation, String> getOutcome =
+ AnnotationStatistics.annotationToFeatureValue("category");
+
+ // calculate statistics, iterating over the results of the classifier
+ AnalysisEngine engine = builder.createAggregate();
+ for (Iterator<JCas> casIter = new JCasIterator(collectionReader, engine); casIter.hasNext();) {
+ JCas jCas = casIter.next();
+ // get the gold view
+ JCas goldView;
+ try {
+ goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // get the gold and system annotations
+ Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
+ JCasUtil.select(goldView, this.relationClass);
+ Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
+ JCasUtil.select(jCas, this.relationClass);
+
+ if (this.ignoreImpossibleGoldRelations) {
+ // collect only relations where both arguments have some possible system
+ // arguments
+ List<BinaryTextRelation> relations = Lists.newArrayList();
+ for (BinaryTextRelation relation : goldBinaryTextRelations) {
+ boolean hasSystemArgs = true;
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
+ Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
+ boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
+ hasSystemArgs = hasSystemArgs && !noSystemArg;
+ }
+ if (hasSystemArgs) {
+ relations.add(relation);
+ } else {
+ IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
+ IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
+ String messageFormat =
+ "removing relation between %s and %s which is impossible to "
+ + "find with system mentions";
+ String message = String.format(messageFormat, format(arg1), format(arg2));
+ UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
+ }
+ }
+ goldBinaryTextRelations = relations;
+ }
+
+ if (this.allowSmallerSystemArguments) {
+
+ // collect all the arguments of the manually annotated relations
+ Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
+ for (BinaryTextRelation relation : goldBinaryTextRelations) {
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
+ }
+ }
+
+ // collect all the arguments of system-predicted relations that don't
+ // match some gold argument
+ Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
+ for (BinaryTextRelation relation : systemBinaryTextRelations) {
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
+ Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
+ boolean matchesSomeGold = false;
+ for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
+ goldView,
+ systemClass,
+ systemArg)) {
+ if (goldArg.getBegin() == systemArg.getBegin()
+ && goldArg.getEnd() == systemArg.getEnd()) {
+ matchesSomeGold = true;
+ break;
+ }
+ }
+ if (!matchesSomeGold) {
+ unmatchedSystemArgs.add(systemArg);
+ }
+ }
+ }
+
+ // map each unmatched system argument to the gold argument that encloses
+ // it
+ Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
+ for (IdentifiedAnnotation goldArg : goldArgs) {
+ Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
+ for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
+ if (unmatchedSystemArgs.contains(systemArg)) {
+
+ // if there's no mapping yet for this system arg, map it to the
+ // enclosing gold arg
+ IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
+ if (oldGoldArg == null) {
+ systemToGold.put(systemArg, goldArg);
+ }
+
+ // if there's already a mapping for this system arg, only re-map
+ // it to match the type
+ else {
+ IdentifiedAnnotation current, other;
+ if (systemArg.getTypeID() == goldArg.getTypeID()) {
+ systemToGold.put(systemArg, goldArg);
+ current = goldArg;
+ other = oldGoldArg;
+ } else {
+ current = oldGoldArg;
+ other = goldArg;
+ }
+
+ // issue a warning since this re-mapping procedure is imperfect
+ String message =
+ "system argument %s mapped to gold argument %s, but could also be mapped to %s";
+ message = String.format(message, format(systemArg), format(current), format(other));
+ UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
+ }
+ }
+ }
+ }
+
+ // replace system arguments with gold arguments where necessary/possible
+ for (BinaryTextRelation relation : systemBinaryTextRelations) {
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
+ IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
+ if (matchingGoldArg != null) {
+ String messageFormat = "replacing system argument %s with gold argument %s";
+ String message =
+ String.format(messageFormat, format(systemArg), format(matchingGoldArg));
+ UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
+ relArg.setArgument(matchingGoldArg);
+ }
+ }
+ }
+ }
+
+ // update the statistics based on the argument spans of the relation
+ stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);
+
+ // print errors if requested
+ if (this.printErrors) {
+ Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : goldBinaryTextRelations) {
+ goldMap.put(new HashableArguments(relation), relation);
+ }
+ Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : systemBinaryTextRelations) {
+ systemMap.put(new HashableArguments(relation), relation);
+ }
+ Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+ List<HashableArguments> sorted = Lists.newArrayList(all);
+ Collections.sort(sorted);
+ for (HashableArguments key : sorted) {
+ BinaryTextRelation goldRelation = goldMap.get(key);
+ BinaryTextRelation systemRelation = systemMap.get(key);
+ if (goldRelation == null) {
+ System.out.println("System added: " + formatRelation(systemRelation));
+ } else if (systemRelation == null) {
+ System.out.println("System dropped: " + formatRelation(goldRelation));
+ } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
+ String label = systemRelation.getCategory();
+ System.out.printf("System labeled %s for %s\n", label, formatRelation(systemRelation));
+ }
+ }
+ }
+ }
+
+ System.err.print(stats);
+ System.err.println();
+ return stats;
+ }
+
+ private static String formatRelation(BinaryTextRelation relation) {
+ IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
+ IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
+ String text = arg1.getCAS().getDocumentText();
+ int begin = Math.min(arg1.getBegin(), arg2.getBegin());
+ int end = Math.max(arg1.getBegin(), arg2.getBegin());
+ begin = Math.max(0, begin - 50);
+ end = Math.min(text.length(), end + 50);
+ return String.format(
+ "%s(%s(type=%d), %s(type=%d)) in ...%s...",
+ relation.getCategory(),
+ arg1.getCoveredText(),
+ arg1.getTypeID(),
+ arg2.getCoveredText(),
+ arg2.getTypeID(),
+ text.substring(begin, end).replaceAll("[\r\n]", " "));
+ }
+
+ /**
+ * Annotator that removes cTAKES mentions in the system view and copies
+ * relations from the gold view to the system view
+ */
+ public static class RemoveCTakesMentionsAndCopyGoldRelations extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas goldView, systemView;
+ try {
+ goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
+ systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // remove cTAKES Mentions and Modifiers from system view
+ List<IdentifiedAnnotation> cTakesMentions = new ArrayList<>();
+ cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
+ cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
+ cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
+ for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
+ cTakesMention.removeFromIndexes();
+ }
+
+ // copy gold Mentions and Modifiers to the system view
+ List<IdentifiedAnnotation> goldMentions = new ArrayList<>();
+ goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
+ goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+ goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+ CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
+ for (IdentifiedAnnotation goldMention : goldMentions) {
+ Annotation copy = (Annotation) copier.copyFs(goldMention);
+ copy.setFeatureValue(sofaFeature, systemView.getSofa());
+ copy.addToIndexes();
+ }
+
+ // copy gold relations to the system view
+ for (BinaryTextRelation goldRelation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
+ BinaryTextRelation relation = (BinaryTextRelation) copier.copyFs(goldRelation);
+ relation.addToIndexes(systemView);
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ relArg.addToIndexes(systemView);
+ // relArg.getArgument() should have been added to indexes with
+ // mentions above
+ }
+ }
+ }
+ }
+
+ /**
+ * Annotator that removes cTAKES Mentions and Modifiers from the system view,
+ * and copies over the manually annotated Mentions and Modifiers from the gold
+ * view.
+ */
+ public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas goldView, systemView;
+ try {
+ goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
+ systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // remove cTAKES Mentions and Modifiers from system view
+ List<IdentifiedAnnotation> cTakesMentions = new ArrayList<>();
+ cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
+ cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
+ cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
+ for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
+ cTakesMention.removeFromIndexes();
+ }
+
+ // copy gold Mentions and Modifiers to the system view
+ List<IdentifiedAnnotation> goldMentions = new ArrayList<>();
+ goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
+ goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+ goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+ CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ for (IdentifiedAnnotation goldMention : goldMentions) {
+ Annotation copy = (Annotation) copier.copyFs(goldMention);
+ Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+ copy.setFeatureValue(sofaFeature, systemView.getSofa());
+ copy.addToIndexes();
+ }
+ }
+ }
+
+ static String format(IdentifiedAnnotation a) {
+ return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
+ }
+
+ public static class RemoveSmallerEventMentions extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ Collection<EventMention> mentions = JCasUtil.select(jCas, EventMention.class);
+ for (EventMention mention : Lists.newArrayList(mentions)) {
+ int begin = mention.getBegin();
+ int end = mention.getEnd();
+ int typeID = mention.getTypeID();
+ List<EventMention> subMentions = JCasUtil.selectCovered(jCas, EventMention.class, mention);
+ for (EventMention subMention : subMentions) {
+ if (subMention.getBegin() > begin || subMention.getEnd() < end) {
+ if (subMention.getTypeID() == typeID) {
+ String message =
+ String.format("removed %s inside %s", format(subMention), format(mention));
+ this.getContext().getLogger().log(Level.WARNING, message);
+ subMention.removeFromIndexes();
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * This class is useful for mapping the spans of relation arguments to the
+ * relation's category.
+ */
+ public static class HashableArguments implements Comparable<HashableArguments> {
+
+ protected int arg1begin;
+
+ protected int arg1end;
+
+ protected int arg2begin;
+
+ protected int arg2end;
+
+ public HashableArguments(int arg1begin, int arg1end, int arg2begin, int arg2end) {
+ this.arg1begin = arg1begin;
+ this.arg1end = arg1end;
+ this.arg2begin = arg2begin;
+ this.arg2end = arg2end;
+ }
+
+ public HashableArguments(Annotation arg1, Annotation arg2) {
+ this(arg1.getBegin(), arg1.getEnd(), arg2.getBegin(), arg2.getEnd());
+ }
+
+ public HashableArguments(BinaryTextRelation relation) {
+ this(relation.getArg1().getArgument(), relation.getArg2().getArgument());
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ boolean result = false;
+ if (otherObject instanceof HashableArguments) {
+ HashableArguments other = (HashableArguments) otherObject;
+ result =
+ (this.getClass() == other.getClass()
+ && this.arg1begin == other.arg1begin
+ && this.arg1end == other.arg1end
+ && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
+ }
+ return result;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(this.arg1begin, this.arg1end, this.arg2begin, this.arg2end);
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "%s(%s,%s,%s,%s)",
+ this.getClass().getSimpleName(),
+ this.arg1begin,
+ this.arg1end,
+ this.arg2begin,
+ this.arg2end);
+ }
+
+ @Override
+ public int compareTo(HashableArguments that) {
+ int thisBegin = Math.min(this.arg1begin, this.arg2begin);
+ int thatBegin = Math.min(that.arg1begin, that.arg2begin);
+ int thisEnd = Math.max(this.arg1end, this.arg2end);
+ int thatEnd = Math.max(that.arg1end, that.arg2end);
+
+ if (thisBegin < thatBegin) {
+ return -1;
+ } else if (thisBegin > thatBegin) {
+ return +1;
+ } else if (this.equals(that)) {
+ return 0;
+ } else if (thisEnd < thatEnd) {
+ return -1;
+ } else{
+ return 1;
+ }
+ }
- }
+ }
}