You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/04/23 21:13:34 UTC
svn commit: r1471101 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines:
./ BaselineEventTimeRelationAnnotator.java
Author: dligach
Date: Tue Apr 23 19:13:34 2013
New Revision: 1471101
URL: http://svn.apache.org/r1471101
Log:
Baseline that marks "CONTAINS" relation between a time expression and the nearest event mention
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/BaselineEventTimeRelationAnnotator.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/BaselineEventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/BaselineEventTimeRelationAnnotator.java?rev=1471101&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/BaselineEventTimeRelationAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/baselines/BaselineEventTimeRelationAnnotator.java Tue Apr 23 19:13:34 2013
@@ -0,0 +1,96 @@
+package org.apache.ctakes.temporal.ae.baselines;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Ordering;
+
+public class BaselineEventTimeRelationAnnotator extends RelationExtractorAnnotator {
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ BaselineEventTimeRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ @Override
+ protected Class<? extends Annotation> getCoveringClass() {
+ return Sentence.class;
+ }
+
+ @Override
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas jCas,
+ Annotation sentence) {
+
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+ List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sentence);
+ List<TimeMention> times = JCasUtil.selectCovered(jCas, TimeMention.class, sentence);
+
+ if(times.size() != 1 || events.size() < 1) {
+ return pairs;
+ }
+
+ for (EventMention event : events) {
+ // ignore subclasses like Procedure and Disease/Disorder
+ if (event.getClass().equals(EventMention.class)) {
+ for (TimeMention time : times) {
+ pairs.add(new IdentifiedAnnotationPair(event, time));
+ }
+ }
+ }
+
+ // compute token distance for each time-event pair
+ HashMap<IdentifiedAnnotationPair, Integer> distanceLookup = new HashMap<IdentifiedAnnotationPair, Integer>();
+ for(IdentifiedAnnotationPair pair : pairs) {
+ List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, pair.getArg1(), pair.getArg2());
+ int distance = baseTokens.size();
+ distanceLookup.put(pair, distance);
+ }
+
+ // find the pair where the distance between entities is the smallest and return it
+ List<IdentifiedAnnotationPair> rankedPairs = new ArrayList<IdentifiedAnnotationPair>(distanceLookup.keySet());
+ Function<IdentifiedAnnotationPair, Integer> getValue = Functions.forMap(distanceLookup);
+ Collections.sort(rankedPairs, Ordering.natural().onResultOf(getValue));
+
+ List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
+ result.add(rankedPairs.get(0));
+
+ System.out.println(sentence.getCoveredText());
+ System.out.println("arg1: " + result.get(0).getArg1().getCoveredText());
+ System.out.println("arg2: " + result.get(0).getArg2().getCoveredText());
+ System.out.println();
+
+ return result;
+ }
+
+ @Override
+ public String classify(List<Feature> features) {
+ return "CONTAINS";
+ }
+}