You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/10 23:43:56 UTC
svn commit: r1481199 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
ae/EventEventRelationAnnotator.java eval/EvaluationOfEventEventRelations.java
eval/EvaluationOfTemporalRelations_ImplBase.java
Author: tmill
Date: Fri May 10 21:43:55 2013
New Revision: 1481199
URL: http://svn.apache.org/r1481199
Log:
First pass at event-event classifier and eval scripts, mostly copy/pasted from event-time versions. Much consolidation possible later.
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java (with props)
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java?rev=1481199&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java Fri May 10 21:43:55 2013
@@ -0,0 +1,113 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.TemporalAttributeFeatureExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventEventRelationAnnotator extends RelationExtractorAnnotator {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory,
+ double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventEventRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory,
+ RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ // not sure why this has to be cast; something funny going on in uimaFIT maybe?
+ (float) probabilityOfKeepingANegativeExample);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventEventRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ @Override
+ protected List<RelationFeaturesExtractor> getFeatureExtractors() {
+ return Lists.newArrayList(
+ new TokenFeaturesExtractor()
+ , new PartOfSpeechFeaturesExtractor()
+ , new TemporalAttributeFeatureExtractor()
+// , new EventTimeFlatTreeFeatureExtractor()
+// , new TemporalPETExtractor()
+// , new TemporalPathExtractor()
+// , new TemporalFTExtractor()
+ );
+ }
+
+ @Override
+ protected Class<? extends Annotation> getCoveringClass() {
+ return Sentence.class;
+ }
+
+ @Override
+ protected List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas jCas, Annotation sentence) {
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+ List<EventMention> events = new ArrayList<EventMention>(JCasUtil.selectCovered(jCas, EventMention.class, sentence));
+ for (int i = 0; i < events.size(); i++){
+ for(int j = i+1; j < events.size(); j++){
+ pairs.add(new IdentifiedAnnotationPair(events.get(i), events.get(j)));
+ }
+ }
+ return pairs;
+ }
+
+ @Override
+ protected String getRelationCategory(
+ Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = relationLookup.get(Arrays.asList(arg2, arg1));
+ if (relation != null) {
+ category = relation.getCategory() + "-1";
+ }
+ }
+ if (category == null && coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+ category = NO_RELATION_CATEGORY;
+ }
+ return category;
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java?rev=1481199&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java Fri May 10 21:43:55 2013
@@ -0,0 +1,218 @@
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.net.URI;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
+import org.apache.ctakes.temporal.ae.EventEventRelationAnnotator;
+import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
+import org.apache.ctakes.temporal.ae.baselines.RecallBaselineEventTimeRelationAnnotator;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.AddTransitiveContainsRelations;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.ParameterSettings;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.PreserveEventEventRelations;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.RemoveCrossSentenceRelations;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.RemoveEventEventRelations;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.RemoveNonContainsRelations;
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.RemoveRelations;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.testing.util.HideOutput;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfEventEventRelations extends
+ EvaluationOfTemporalRelations_ImplBase {
+
+ public EvaluationOfEventEventRelations(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ File xmiDirectory,
+ File treebankDirectory,
+ boolean useClosure,
+ boolean printErrors,
+ boolean printRelations,
+ boolean baseline,
+ String kernelParams,
+ ParameterSettings params){
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory,
+ treebankDirectory, printErrors, printRelations, params);
+
+ }
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory)
+ throws Exception {
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+ aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class, BinaryTextRelation.class));
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(MergeContainsOverlap.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonContainsRelations.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCrossSentenceRelations.class));
+ // TODO -- see if this applies to this relation:
+// if (this.useClosure) {
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class));
+// }
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(PreserveEventEventRelations.class));
+ aggregateBuilder.add(EventEventRelationAnnotator.createDataWriterDescription(
+ LIBSVMStringOutcomeDataWriter.class,
+// TKSVMlightStringOutcomeDataWriter.class,
+ directory,
+ params.probabilityOfKeepingANegativeExample));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+
+ HideOutput hider = new HideOutput();
+ JarClassifierBuilder.trainAndPackage(directory, "-t", "2", "-d", "2", "-c", "10");
+ hider.restoreOutput();
+ hider.close();
+ }
+
+ @Override
+ protected AnnotationStatistics<String> test(
+ CollectionReader collectionReader, File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+ aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class));
+// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(MergeContainsOverlap.class,
+// MergeContainsOverlap.PARAM_RELATION_VIEW,
+// GOLD_VIEW_NAME));
+ aggregateBuilder.add(
+ AnalysisEngineFactory.createPrimitiveDescription(RemoveNonContainsRelations.class,
+ RemoveNonContainsRelations.PARAM_RELATION_VIEW,
+ GOLD_VIEW_NAME));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ RemoveCrossSentenceRelations.class,
+ RemoveCrossSentenceRelations.PARAM_SENTENCE_VIEW,
+ CAS.NAME_DEFAULT_SOFA,
+ RemoveCrossSentenceRelations.PARAM_RELATION_VIEW,
+ GOLD_VIEW_NAME));
+ // TODO - use if relevant.
+// if (this.useClosure) {
+// aggregateBuilder.add(
+// AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class),
+// CAS.NAME_DEFAULT_SOFA,
+// GOLD_VIEW_NAME);
+// }
+ aggregateBuilder.add(
+ AnalysisEngineFactory.createPrimitiveDescription(PreserveEventEventRelations.class),
+ CAS.NAME_DEFAULT_SOFA,
+ GOLD_VIEW_NAME);
+
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class));
+ // TODO -- if we implement a baseline
+// aggregateBuilder.add(this.baseline ? RecallBaselineEventTimeRelationAnnotator.createAnnotatorDescription(directory) :
+// EventTimeRelationAnnotator.createAnnotatorDescription(directory));
+ aggregateBuilder.add(
+ EventEventRelationAnnotator.createAnnotatorDescription(directory));
+
+ Function<BinaryTextRelation, ?> getSpan = new Function<BinaryTextRelation, HashableArguments>() {
+ public HashableArguments apply(BinaryTextRelation relation) {
+ return new HashableArguments(relation);
+ }
+ };
+ Function<BinaryTextRelation, String> getOutcome = AnnotationStatistics.annotationToFeatureValue("category");
+
+ AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
+ JCasIterable jcasIter =new JCasIterable(collectionReader, aggregateBuilder.createAggregate());
+ JCas jCas = null;
+ while(jcasIter.hasNext()) {
+ jCas = jcasIter.next();
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ Collection<BinaryTextRelation> goldRelations = JCasUtil.select(
+ goldView,
+ BinaryTextRelation.class);
+ Collection<BinaryTextRelation> systemRelations = JCasUtil.select(
+ systemView,
+ BinaryTextRelation.class);
+ stats.add(goldRelations, systemRelations, getSpan, getOutcome);
+ if(this.printRelations){
+ URI uri = ViewURIUtil.getURI(jCas);
+ String[] path = uri.getPath().split("/");
+ printRelationAnnotations(path[path.length - 1], systemRelations);
+ }
+ if(this.printErrors){
+ Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : goldRelations) {
+ goldMap.put(new HashableArguments(relation), relation);
+ }
+ Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
+ for (BinaryTextRelation relation : systemRelations) {
+ systemMap.put(new HashableArguments(relation), relation);
+ }
+ Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+ List<HashableArguments> sorted = Lists.newArrayList(all);
+ Collections.sort(sorted);
+ for (HashableArguments key : sorted) {
+ BinaryTextRelation goldRelation = goldMap.get(key);
+ BinaryTextRelation systemRelation = systemMap.get(key);
+ if (goldRelation == null) {
+ System.out.println("System added: " + formatRelation(systemRelation));
+ } else if (systemRelation == null) {
+ System.out.println("System dropped: " + formatRelation(goldRelation));
+ } else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
+ String label = systemRelation.getCategory();
+ System.out.printf("System labeled %s for %s\n", label, formatRelation(goldRelation));
+ } else{
+ System.out.println("Nailed it! " + formatRelation(systemRelation));
+ }
+ }
+ }
+ }
+ return stats;
+ }
+
+ public static void main(String[] args) throws Exception{
+ TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args);
+ List<Integer> patientSets = options.getPatients().getList();
+ List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+ List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+ List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
+ File workingDir = new File("target/eval/temporal-relations/event-event/");
+ ParameterSettings params = defaultParams;
+ EvaluationOfEventEventRelations evaluation = new EvaluationOfEventEventRelations(
+ workingDir,
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getXMIDirectory(),
+ options.getTreebankDirectory(),
+ options.getClosure(),
+ options.getPrintErrors(),
+ options.getPrintFormattedRelations(),
+ options.getBaseline(),
+ options.getKernelParams(),
+ params);
+ evaluation.prepareXMIsFor(patientSets);
+ List<Integer> training = trainItems;
+ List<Integer> testing = null;
+ if(options.getTest()){
+ training.addAll(devItems);
+ testing = testItems;
+ }else{
+ testing = devItems;
+ }
+ params.stats = evaluation.trainAndTest(training, testing);
+ System.err.println(params.stats);
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventEventRelations.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java?rev=1481199&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java Fri May 10 21:43:55 2013
@@ -0,0 +1,96 @@
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+
+import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations.ParameterSettings;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.tksvmlight.model.CompositeKernel.ComboOperator;
+import org.cleartk.eval.AnnotationStatistics;
+
+import com.lexicalscope.jewel.cli.Option;
+
+public abstract class EvaluationOfTemporalRelations_ImplBase extends
+ Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+ static interface TempRelOptions extends Evaluation_ImplBase.Options{
+ @Option
+ public boolean getTest();
+
+ @Option
+ public boolean getPrintFormattedRelations();
+
+ @Option
+ public boolean getBaseline();
+
+ @Option
+ public boolean getClosure();
+ }
+
+ protected static boolean DEFAULT_BOTH_DIRECTIONS = false;
+ protected static float DEFAULT_DOWNSAMPLE = 1.0f;
+ protected static double DEFAULT_SVM_C = 1.0;
+ protected static double DEFAULT_SVM_G = 1.0;
+ protected static double DEFAULT_TK = 0.5;
+ protected static double DEFAULT_LAMBDA = 0.5;
+
+ protected static ParameterSettings defaultParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "linear",
+ DEFAULT_SVM_C, DEFAULT_SVM_G, "polynomial", ComboOperator.SUM, DEFAULT_TK, DEFAULT_LAMBDA);
+
+
+ protected ParameterSettings params = null;
+ protected boolean printRelations = false;
+
+ public EvaluationOfTemporalRelations_ImplBase(File baseDirectory,
+ File rawTextDirectory, File knowtatorXMLDirectory,
+ File xmiDirectory, File treebankDirectory, boolean printRelations, boolean printErrors, ParameterSettings params) {
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory,
+ treebankDirectory);
+ this.params = params;
+ this.printRelations = printRelations;
+ this.printErrors = printErrors;
+ }
+
+ protected static void printRelationAnnotations(String fileName, Collection<BinaryTextRelation> relations) {
+
+ for(BinaryTextRelation binaryTextRelation : relations) {
+
+ Annotation arg1 = binaryTextRelation.getArg1().getArgument();
+ Annotation arg2 = binaryTextRelation.getArg2().getArgument();
+
+ String arg1Type = arg1.getClass().getSimpleName();
+ String arg2Type = arg2.getClass().getSimpleName();
+
+ int arg1Begin = arg1.getBegin();
+ int arg1End = arg1.getEnd();
+ int arg2Begin = arg2.getBegin();
+ int arg2End = arg2.getEnd();
+
+ String category = binaryTextRelation.getCategory();
+
+ System.out.format("%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\n",
+ fileName, category, arg1Type, arg1Begin, arg1End, arg2Type, arg2Begin, arg2End);
+ }
+ }
+
+ protected static String formatRelation(BinaryTextRelation relation) {
+ IdentifiedAnnotation arg1 = (IdentifiedAnnotation)relation.getArg1().getArgument();
+ IdentifiedAnnotation arg2 = (IdentifiedAnnotation)relation.getArg2().getArgument();
+ String text = arg1.getCAS().getDocumentText();
+ int begin = Math.min(arg1.getBegin(), arg2.getBegin());
+ int end = Math.max(arg1.getBegin(), arg2.getBegin());
+ begin = Math.max(0, begin - 50);
+ end = Math.min(text.length(), end + 50);
+ return String.format(
+ "%s(%s(type=%d), %s(type=%d)) in ...%s...",
+ relation.getCategory(),
+ arg1.getCoveredText(),
+ arg1.getTypeID(),
+ arg2.getCoveredText(),
+ arg2.getTypeID(),
+ text.substring(begin, end).replaceAll("[\r\n]", " "));
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations_ImplBase.java
------------------------------------------------------------------------------
svn:mime-type = text/plain