You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/09/24 15:33:36 UTC

svn commit: r1762128 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java

Author: dligach
Date: Sat Sep 24 15:33:36 2016
New Revision: 1762128

URL: http://svn.apache.org/viewvc?rev=1762128&view=rev
Log:
A version of EventTimeAnnotator that outputs pos features instead of token features

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java?rev=1762128&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimePosBasedAnnotator.java Sat Sep 24 15:33:36 2016
@@ -0,0 +1,188 @@
+package org.apache.ctakes.temporal.nn.ae;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.TemporalRelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.temporal.nn.data.EventTimeRelPrinter;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimePosBasedAnnotator extends CleartkAnnotator<String> {
+
+  public static final String NO_RELATION_CATEGORY = "none";
+
+  public EventTimePosBasedAnnotator() {
+    // TODO Auto-generated constructor stub
+  }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+    //get all gold relation lookup
+    Map<List<Annotation>, BinaryTextRelation> relationLookup;
+    relationLookup = new HashMap<>();
+    if(this.isTraining()) {
+      relationLookup = new HashMap<>();
+      for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+        Annotation arg1 = relation.getArg1().getArgument();
+        Annotation arg2 = relation.getArg2().getArgument();
+        // The key is a list of args so we can do bi-directional lookup
+        List<Annotation> key = Arrays.asList(arg1, arg2);
+        if(relationLookup.containsKey(key)){
+          String reln = relationLookup.get(key).getCategory();
+          System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+          System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+        } else{
+          relationLookup.put(key, relation);
+        }
+      }
+    }
+
+    // go over sentences, extracting event-time relation instances
+    for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+      // collect all relevant relation arguments from the sentence
+      List<IdentifiedAnnotationPair> candidatePairs =
+          getCandidateRelationArgumentPairs(jCas, sentence);
+
+      // walk through the pairs of annotations
+      for(IdentifiedAnnotationPair pair : candidatePairs) {
+        IdentifiedAnnotation arg1 = pair.getArg1();
+        IdentifiedAnnotation arg2 = pair.getArg2();
+
+        String context;
+        if (arg2.getBegin() < arg1.getBegin()) {
+          // ... time ... event ... scenario
+          context = EventTimeRelPrinter.getPosContext(jCas, sentence, arg2, "t", arg1, "e", 2);
+        } else {
+          // ... event ... time ... scenario
+          context = EventTimeRelPrinter.getPosContext(jCas, sentence, arg1, "e", arg2, "t", 2);
+        }
+
+        // derive features based on context
+        List<Feature> features = new ArrayList<>();
+        String[] tokens = context.split(" ");
+        for (String token: tokens){
+          features.add(new Feature(token.toLowerCase()));
+        }
+
+        // during training, feed the features to the data writer
+        if (this.isTraining()) {
+          String category = getRelationCategory(relationLookup, arg1, arg2);
+          if (category == null) {
+            category = NO_RELATION_CATEGORY;
+          } else{
+            category = category.toLowerCase();
+          }
+          this.dataWriter.write(new Instance<>(category, features));
+        }
+
+        // during classification feed the features to the classifier and create annotations
+        else {
+          String predictedCategory = this.classifier.classify(features);
+
+          // add a relation annotation if a true relation was predicted
+          if (predictedCategory != null && !predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+            // if we predict an inverted relation, reverse the order of the arguments
+            if (predictedCategory.endsWith("-1")) {
+              predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+              if(arg1 instanceof TimeMention){
+                IdentifiedAnnotation temp = arg1;
+                arg1 = arg2;
+                arg2 = temp;
+              }
+            } else {
+              if(arg1 instanceof EventMention){
+                IdentifiedAnnotation temp = arg1;
+                arg1 = arg2;
+                arg2 = temp;
+              }
+            }
+
+            createRelation(jCas, arg1, arg2, predictedCategory.toUpperCase(), 0.0);
+          }
+        }
+      }
+
+    }
+  }
+  
+  /** Dima's way of getting lables
+   * @param relationLookup
+   * @param arg1
+   * @param arg2
+   * @return
+   */
+  protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2){
+    BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+    String category = null;
+    if (relation != null) {
+      category = relation.getCategory();
+      if(arg1 instanceof EventMention){
+        category = category + "-1";
+      }
+    } else {
+      relation = relationLookup.get(Arrays.asList(arg2, arg1));
+      if (relation != null) {
+        category = relation.getCategory();
+        if(arg2 instanceof EventMention){
+          category = category + "-1";
+        }
+      }
+    }
+    return category;
+
+  }
+
+  protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Arg1");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Arg2");
+    relArg2.addToIndexes();
+    TemporalTextRelation relation = new TemporalTextRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.setConfidence(confidence);
+    relation.addToIndexes();
+  }
+
+  public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas jCas, Annotation sentence) {
+    List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+    for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+      // ignore subclasses like Procedure and Disease/Disorder
+      if (event.getClass().equals(EventMention.class)) {
+        for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+          pairs.add(new IdentifiedAnnotationPair(event, time));
+        }
+      }
+    }
+    return pairs;
+  }
+}