You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/09/23 14:24:50 UTC
svn commit: r1762065 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java
Author: dligach
Date: Fri Sep 23 14:24:50 2016
New Revision: 1762065
URL: http://svn.apache.org/viewvc?rev=1762065&view=rev
Log:
annotator for token/pos hybrid model
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java?rev=1762065&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventTimeHybridAnnotator.java Fri Sep 23 14:24:50 2016
@@ -0,0 +1,293 @@
+package org.apache.ctakes.temporal.nn.ae;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.TemporalRelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.temporal.nn.data.EventTimeRelPrinter;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimeHybridAnnotator extends CleartkAnnotator<String> {
+
+ public static final String NO_RELATION_CATEGORY = "none";
+
+ public EventTimeHybridAnnotator() {
+ // TODO Auto-generated constructor stub
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+ //get all gold relation lookup
+ Map<List<Annotation>, BinaryTextRelation> relationLookup;
+ relationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ relationLookup = new HashMap<>();
+ for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ List<Annotation> key = Arrays.asList(arg1, arg2);
+ if(relationLookup.containsKey(key)){
+ String reln = relationLookup.get(key).getCategory();
+ System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+ System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+ }else{
+ relationLookup.put(key, relation);
+ }
+ }
+ }
+
+ // go over sentences, extracting event-time relation instances
+ for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ // collect all relevant relation arguments from the sentence
+ List<IdentifiedAnnotationPair> candidatePairs =
+ getCandidateRelationArgumentPairs(jCas, sentence);
+
+ // walk through the pairs of annotations
+ for (IdentifiedAnnotationPair pair : candidatePairs) {
+ IdentifiedAnnotation arg1 = pair.getArg1();
+ IdentifiedAnnotation arg2 = pair.getArg2();
+
+ String tokenContext;
+ String posContext;
+ if(arg2.getBegin() < arg1.getBegin()) {
+ // ... time ... event ... scenario
+ tokenContext = EventTimeRelPrinter.getTokenContext(jCas, sentence, arg2, "t", arg1, "e", 2);
+ posContext = EventTimeRelPrinter.getPosContext(jCas, sentence, arg2, "t", arg1, "e", 2);
+ } else {
+ // ... event ... time ... scenario
+ tokenContext = EventTimeRelPrinter.getTokenContext(jCas, sentence, arg1, "e", arg2, "t", 2);
+ posContext = EventTimeRelPrinter.getPosContext(jCas, sentence, arg1, "e", arg2, "t", 2);
+ }
+
+ //derive features based on context:
+ List<Feature> features = new ArrayList<>();
+ String[] tokens = (tokenContext + "|" + posContext).split(" ");
+ for (String token: tokens){
+ features.add(new Feature(token.toLowerCase()));
+ }
+
+ // during training, feed the features to the data writer
+ if (this.isTraining()) {
+ String category = getRelationCategory(relationLookup, arg1, arg2);
+ if (category == null) {
+ category = NO_RELATION_CATEGORY;
+ } else{
+ category = category.toLowerCase();
+ }
+ this.dataWriter.write(new Instance<>(category, features));
+ }
+ // during classification feed the features to the classifier and create annotations
+ else {
+ String predictedCategory = this.classifier.classify(features);
+
+ // add a relation annotation if a true relation was predicted
+ if (predictedCategory != null && !predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+ // if we predict an inverted relation, reverse the order of the arguments
+ if (predictedCategory.endsWith("-1")) {
+ predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+ if(arg1 instanceof TimeMention){
+ IdentifiedAnnotation temp = arg1;
+ arg1 = arg2;
+ arg2 = temp;
+ }
+ }else{
+ if(arg1 instanceof EventMention){
+ IdentifiedAnnotation temp = arg1;
+ arg1 = arg2;
+ arg2 = temp;
+ }
+ }
+
+ createRelation(jCas, arg1, arg2, predictedCategory.toUpperCase(), 0.0);
+ }
+ }
+ }
+
+ }
+ }
+
+ /**
+ * Print context from left to right.
+ * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
+ */
+ public static String getTokensBetweenExpanded(
+ JCas jCas,
+ Sentence sent,
+ Annotation left,
+ String leftType,
+ Annotation right,
+ String rightType,
+ int contextSize,
+ Map<EventMention, Collection<EventMention>> coveringMap) {
+
+ boolean leftIsExpanded = false;
+ Annotation longerLeft = left;
+ if(left instanceof EventMention){
+ longerLeft = getLongerEvent(coveringMap, left);
+ if(longerLeft != left){
+ leftIsExpanded = true;
+ }
+ }
+
+ boolean rightIsExpanded = false;
+ Annotation longerRight = right;
+ if(right instanceof EventMention){
+ longerRight = getLongerEvent(coveringMap, right);
+ if(longerRight != right){
+ rightIsExpanded = true;
+ }
+ }
+
+ List<String> tokens = new ArrayList<>();
+ if(leftIsExpanded){
+ for(BaseToken baseToken : JCasUtil.selectPreceding(jCas, BaseToken.class, longerLeft, contextSize)) {
+ if(sent.getBegin() <= baseToken.getBegin()) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+ }else{
+ for(BaseToken baseToken : JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
+ if(sent.getBegin() <= baseToken.getBegin()) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+ }
+ tokens.add("<" + leftType + ">");
+ tokens.add(left.getCoveredText());
+ tokens.add("</" + leftType + ">");
+ if(leftIsExpanded){
+ for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, longerLeft, right)) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }else if(rightIsExpanded){
+ for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, longerRight)) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }else{
+ for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+ tokens.add("<" + rightType + ">");
+ tokens.add(right.getCoveredText());
+ tokens.add("</" + rightType + ">");
+ if(rightIsExpanded){
+ for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, longerRight, contextSize)) {
+ if(baseToken.getEnd() <= sent.getEnd()) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+ }else{
+ for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
+ if(baseToken.getEnd() <= sent.getEnd()) {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+ }
+
+ return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+ }
+
+ private static Annotation getLongerEvent(Map<EventMention, Collection<EventMention>> coveringMap,
+ Annotation event) {
+ int maxSpan = getSpan(event);
+ Annotation longerEvent = event;
+ Collection<EventMention> eventList = coveringMap.get(event);
+ for(EventMention covEvent : eventList){
+ int span = getSpan(covEvent);
+ if(span > maxSpan){
+ maxSpan = span;
+ longerEvent = covEvent;
+ }
+ }
+ return longerEvent;
+ }
+
+ private static int getSpan(Annotation left) {
+ return (left.getEnd()-left.getBegin());
+ }
+
+ /** Dima's way of getting lables
+ * @param relationLookup
+ * @param arg1
+ * @param arg2
+ * @return
+ */
+ protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2){
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ if(arg1 instanceof EventMention){
+ category = category + "-1";
+ }
+ } else {
+ relation = relationLookup.get(Arrays.asList(arg2, arg1));
+ if (relation != null) {
+ category = relation.getCategory();
+ if(arg2 instanceof EventMention){
+ category = category + "-1";
+ }
+ }
+ }
+ return category;
+
+ }
+
+ protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.setConfidence(confidence);
+ relation.addToIndexes();
+ }
+
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas jCas, Annotation sentence) {
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+ for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+ // ignore subclasses like Procedure and Disease/Disorder
+ if (event.getClass().equals(EventMention.class)) {
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+ pairs.add(new IdentifiedAnnotationPair(event, time));
+ }
+ }
+ }
+ return pairs;
+ }
+}