You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/12/13 15:06:57 UTC
svn commit: r1774030 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn:
ae/ data/ eval/
Author: dligach
Date: Tue Dec 13 15:06:57 2016
New Revision: 1774030
URL: http://svn.apache.org/viewvc?rev=1774030&view=rev
Log:
setting up the code to run event-event position features in ctakes
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventEventPositionBasedAnnotator.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/ArgContextProvider.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPositionPrinter.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPositionPrinter.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EventEventNeuralEvaluation.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventEventPositionBasedAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventEventPositionBasedAnnotator.java?rev=1774030&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventEventPositionBasedAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/EventEventPositionBasedAnnotator.java Tue Dec 13 15:06:57 2016
@@ -0,0 +1,226 @@
+package org.apache.ctakes.temporal.nn.ae;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.TemporalRelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.temporal.nn.data.ArgContextProvider;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventEventPositionBasedAnnotator extends CleartkAnnotator<String> {
+
+ public static final String NO_RELATION_CATEGORY = "none";
+ // private Random coin = new Random(0);
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+ // get all gold relation lookup
+ Map<List<Annotation>, BinaryTextRelation> relationLookup;
+ relationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ relationLookup = new HashMap<>();
+ for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ List<Annotation> key = Arrays.asList(arg1, arg2);
+ if(relationLookup.containsKey(key)){
+ String reln = relationLookup.get(key).getCategory();
+ System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+ System.err.println("Error! This attempted relation " + relation.getCategory() +
+ " already has a relation " + reln + " at this span: " +
+ arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+ } else {
+ relationLookup.put(key, relation);
+ }
+ }
+ }
+
+ for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ // collect all relevant relation arguments from the sentence
+ List<IdentifiedAnnotationPair> candidatePairs = getCandidateRelationArgumentPairs(jCas, sentence);
+
+ // walk through the pairs of annotations
+ for (IdentifiedAnnotationPair pair : candidatePairs) {
+ IdentifiedAnnotation arg1 = pair.getArg1();
+ IdentifiedAnnotation arg2 = pair.getArg2();
+
+ String context;
+ if(arg2.getBegin() < arg1.getBegin()) {
+ // ... event2 ... event1 ... scenario
+ System.out.println("\n-------------- THIS NEVER NAPPENS ------------\n");
+ context = ArgContextProvider.getEventEventPositionContext(jCas, sentence, arg2, arg1);
+ } else {
+ // ... event1 ... event2 ... scenario
+ context = ArgContextProvider.getEventEventPositionContext(jCas, sentence, arg1, arg2);
+ }
+
+ //derive features based on context:
+ List<Feature> feats = new ArrayList<>();
+ String[] tokens = context.split(" ");
+ for (String token: tokens){
+ feats.add(new Feature(token.toLowerCase()));
+ }
+
+ // during training, feed the features to the data writer
+ if(this.isTraining()) {
+ String category = getRelationCategory(relationLookup, arg1, arg2);
+
+ // drop some portion of negative examples during training
+ // if(category == null && coin.nextDouble() <= 0.5) {
+ // continue; // skip this negative example
+ // }
+
+ if(category == null) {
+ category = NO_RELATION_CATEGORY;
+ } else{
+ category = category.toLowerCase();
+ }
+ this.dataWriter.write(new Instance<>(category, feats));
+ } else {
+ String predictedCategory = this.classifier.classify(feats);
+
+ // add a relation annotation if a true relation was predicted
+ if (predictedCategory != null && !predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+ // if we predict an inverted relation, reverse the order of the
+ // arguments
+ if (predictedCategory.endsWith("-1")) {
+ predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+ IdentifiedAnnotation temp = arg1;
+ arg1 = arg2;
+ arg2 = temp;
+ }
+
+ createRelation(jCas, arg1, arg2, predictedCategory.toUpperCase(), 0.0);
+ }
+ }
+ }
+
+ }
+ }
+
+ /**
+ * original way of getting label
+ * @param relationLookup
+ * @param arg1
+ * @param arg2
+ * @return
+ */
+ protected String getRelationCategory(
+ Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category = null;
+ if (relation != null) {
+ if (arg2.getBegin() < arg1.getBegin()) {
+ category = relation.getCategory() + "-1";
+ } else {
+ category = relation.getCategory();
+ }
+ } else {
+ relation = relationLookup.get(Arrays.asList(arg2, arg1));
+ if (relation != null) {
+ if(arg2.getBegin() < arg1.getBegin()){
+ category = relation.getCategory();
+ } else {
+ category = relation.getCategory() + "-1";
+ }
+ }
+ }
+
+ return category;
+ }
+
+ protected String getRelationCategory2(Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+
+ // gold view representation (i.e. only contains relations)
+ BinaryTextRelation arg1ContainsArg2 = relationLookup.get(Arrays.asList(arg1, arg2));
+ BinaryTextRelation arg2ContainsArg1 = relationLookup.get(Arrays.asList(arg2, arg1));
+
+ // now translate to position dependent representation (i.e. contains and contains-1)
+ if(arg1ContainsArg2 != null) {
+ // still need to know whether it's arg1 ... arg2 or arg2 ... arg1
+ // because that determines whether it's contains or contains-1
+ if(arg1.getBegin() < arg2.getBegin()) {
+ return arg1ContainsArg2.getCategory();
+ } else {
+ return arg1ContainsArg2.getCategory() + "-1";
+ }
+ } else if(arg2ContainsArg1 != null) {
+ if(arg1.getBegin() < arg2.getBegin()) {
+ return arg2ContainsArg1.getCategory() + "-1";
+ } else {
+ return arg2ContainsArg1.getCategory();
+ }
+ } else {
+ return null;
+ }
+ }
+
+ protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.setConfidence(confidence);
+ relation.addToIndexes();
+ }
+
+ private static List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas jCas, Sentence sentence) {
+
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+ List<EventMention> events = new ArrayList<>(JCasUtil.selectCovered(jCas, EventMention.class, sentence));
+ // filter events
+ List<EventMention> realEvents = Lists.newArrayList();
+ for( EventMention event : events){
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ events = realEvents;
+
+ int eventNum = events.size();
+ for (int i = 0; i < eventNum-1; i++) {
+ for(int j = i+1; j < eventNum; j++) {
+ EventMention eventA = events.get(i);
+ EventMention eventB = events.get(j);
+ pairs.add(new IdentifiedAnnotationPair(eventA, eventB));
+ }
+ }
+
+ return pairs;
+ }
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/ArgContextProvider.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/ArgContextProvider.java?rev=1774030&r1=1774029&r2=1774030&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/ArgContextProvider.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/ArgContextProvider.java Tue Dec 13 15:06:57 2016
@@ -4,19 +4,152 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
public class ArgContextProvider {
+
+ /**
+ * Position features for event-time relations
+ */
+ public static String getEventTimePositionContext(
+ JCas jCas,
+ Sentence sent,
+ IdentifiedAnnotation time,
+ IdentifiedAnnotation event) {
+
+ // get sentence as a list of tokens
+ List<String> tokens = new ArrayList<>();
+ for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ tokens.add(baseToken.getCoveredText());
+ }
+
+ // find the positions of time and event mentions
+ // assume time consists of multipe words; event of one
+
+ int currentPosition = 0; // current token index
+ int timeFirstPosition = -1000; // timex's start index
+ int timeLastPosition = -1000; // timex's end index
+ int eventPosition = -1000; // event's index
+ for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ if(time.getBegin() == token.getBegin()) {
+ timeFirstPosition = currentPosition; // start of time expression found
+ }
+ if(time.getEnd() == token.getEnd()) {
+ timeLastPosition = currentPosition; // end of time expression found
+ }
+ if(event.getBegin() == token.getBegin()) {
+ eventPosition = currentPosition; // event postion found
+ }
+ currentPosition++;
+ }
+
+ // try to locate events that weren't found
+ // e.g. "this can be re-discussed tomorrow"
+ // "discussed" not found due to incorrect tokenization
+ if(eventPosition == -1000) {
+ currentPosition = 0;
+ for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ if(token.getCoveredText().contains(event.getCoveredText())) {
+ eventPosition = currentPosition;
+ }
+ currentPosition++;
+ }
+ }
+
+ if(eventPosition == -1000) {
+ System.out.println("event not found: " + event.getCoveredText());
+ System.out.println(sent.getCoveredText());
+ System.out.println();
+ eventPosition = 0; // just set it to zero for now
+ }
+
+ // now need to see if some times weren't found
+ if(timeFirstPosition == -1000 || timeLastPosition == -1000) {
+ System.out.println("time not found: " + time.getCoveredText());
+ System.out.println(sent.getCoveredText());
+ System.out.println();
+ timeFirstPosition = 0; // just set it to zero for now
+ timeLastPosition = 0; // just set it to zero for now
+ }
+
+ List<String> positionsWrtToTime = new ArrayList<>();
+ List<String> positionsWrtToEvent = new ArrayList<>();
+ int tokensInSentence = JCasUtil.selectCovered(jCas, BaseToken.class, sent).size();
+ for(int tokenIndex = 0; tokenIndex < tokensInSentence; tokenIndex++) {
+ if(tokenIndex < timeFirstPosition) {
+ positionsWrtToTime.add(Integer.toString(tokenIndex - timeFirstPosition));
+ } else if(tokenIndex >= timeFirstPosition && tokenIndex <= timeLastPosition) {
+ positionsWrtToTime.add("0");
+ } else {
+ positionsWrtToTime.add(Integer.toString(tokenIndex - timeLastPosition));
+ }
+ positionsWrtToEvent.add(Integer.toString(tokenIndex - eventPosition));
+ }
+
+ String tokensAsString = String.join(" ", tokens).replaceAll("[\r\n]", " ");
+ String distanceToTime = String.join(" ", positionsWrtToTime);
+ String distanceToEvent = String.join(" ", positionsWrtToEvent);
+
+ return tokensAsString + "|" + distanceToTime + "|" + distanceToEvent;
+ }
+
+ /**
+ * Position features for event-event relations
+ */
+ public static String getEventEventPositionContext(
+ JCas jCas,
+ Sentence sent,
+ IdentifiedAnnotation event1,
+ IdentifiedAnnotation event2) {
+
+ // get sentence as a list of tokens
+ List<String> tokens = new ArrayList<>();
+ for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ tokens.add(baseToken.getCoveredText());
+ }
+
+ // find the positions of event mentions
+ // assume both events consists of just head words
+
+ int currentPosition = 0; // current token index
+ int event1Position = -1000; // event1's index
+ int event2Position = -1000; // event2's index
+ for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ if(event1.getBegin() == token.getBegin()) {
+ event1Position = currentPosition; // event1 position found
+ }
+ if(event2.getBegin() == token.getBegin()) {
+ event2Position = currentPosition; // event2 postion found
+ }
+ currentPosition++;
+ }
+
+ List<String> positionsWrtToEvent1 = new ArrayList<>();
+ List<String> positionsWrtToEvent2 = new ArrayList<>();
+ int tokensInSentence = JCasUtil.selectCovered(jCas, BaseToken.class, sent).size();
+ for(int tokenIndex = 0; tokenIndex < tokensInSentence; tokenIndex++) {
+ positionsWrtToEvent1.add(Integer.toString(tokenIndex - event1Position));
+ positionsWrtToEvent2.add(Integer.toString(tokenIndex - event2Position));
+ }
+
+ String tokensAsString = String.join(" ", tokens).replaceAll("[\r\n]", " ");
+ String distanceToTime = String.join(" ", positionsWrtToEvent1);
+ String distanceToEvent = String.join(" ", positionsWrtToEvent2);
+
+ return tokensAsString + "|" + distanceToTime + "|" + distanceToEvent;
+ }
+
/**
* Return tokens between arg1 and arg2 as string
* @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
*/
public static String getRegions(JCas jCas, Sentence sent, Annotation left, Annotation right, int contextSize) {
-
+
// tokens to the left from the left argument
List<String> leftTokens = new ArrayList<>();
for(BaseToken baseToken : JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
@@ -25,28 +158,28 @@ public class ArgContextProvider {
}
}
String leftAsString = String.join(" ", leftTokens).replaceAll("[\r\n]", " ");
-
+
// left arg tokens
List<String> arg1Tokens = new ArrayList<>();
for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, left)) {
arg1Tokens.add(baseToken.getCoveredText());
}
String arg1AsString = String.join(" ", arg1Tokens).replaceAll("[\r\n]", " ");
-
+
// tokens between the arguments
List<String> betweenTokens = new ArrayList<>();
for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
betweenTokens.add(baseToken.getCoveredText());
}
String betweenAsString = String.join(" ", betweenTokens).replaceAll("[\r\n]", " ");
-
+
// right arg tokens
List<String> arg2Tokens = new ArrayList<>();
for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, right)) {
arg2Tokens.add(baseToken.getCoveredText());
}
String arg2AsString = String.join(" ", arg2Tokens).replaceAll("[\r\n]", " ");
-
+
// tokens to the right from the right argument
List<String> rightTokens = new ArrayList<>();
for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
@@ -55,10 +188,10 @@ public class ArgContextProvider {
}
}
String rightAsString = String.join(" ", rightTokens).replaceAll("[\r\n]", " ");
-
+
return leftAsString + "|" + arg1AsString + "|" + betweenAsString + "|" + arg2AsString + "|" + rightAsString;
}
-
+
/**
* Print words from left to right.
* @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPositionPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPositionPrinter.java?rev=1774030&r1=1774029&r2=1774030&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPositionPrinter.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPositionPrinter.java Tue Dec 13 15:06:57 2016
@@ -33,7 +33,6 @@ import org.apache.ctakes.temporal.durati
import org.apache.ctakes.temporal.eval.CommandLine;
import org.apache.ctakes.temporal.eval.THYMEData;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -194,8 +193,7 @@ public class EventEventRelPositionPrinte
System.out.println();
}
- String context = getPositionContext(systemView, sentence, mention1, mention2);
- // String context = ArgContextProvider.getRegions(systemView, sentence, mention1, mention2, 2);
+ String context = ArgContextProvider.getEventEventPositionContext(systemView, sentence, mention1, mention2);
String text = String.format("%s|%s", label, context);
eventEventRelationsInSentence.add(text.toLowerCase());
@@ -210,54 +208,4 @@ public class EventEventRelPositionPrinte
}
}
}
-
- /**
- * Print indices
- * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
- */
- public static String getPositionContext(
- JCas jCas,
- Sentence sent,
- EventMention event1,
- EventMention event2) {
-
- // get sentence as a list of tokens
- List<String> tokens = new ArrayList<>();
- for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- tokens.add(baseToken.getCoveredText());
- }
-
- // find the positions of event mentions
- // assume both events consists of just head words
-
- int currentPosition = 0; // current token index
- int event1Position = -1000; // event1's index
- int event2Position = -1000; // event2's index
-
- for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- if(event1.getBegin() == token.getBegin()) {
- event1Position = currentPosition; // event1 position found
- }
- if(event2.getBegin() == token.getBegin()) {
- event2Position = currentPosition; // event2 postion found
- }
- currentPosition++;
- }
-
- List<String> positionsWrtToEvent1 = new ArrayList<>();
- List<String> positionsWrtToEvent2 = new ArrayList<>();
- int tokensInSentence = JCasUtil.selectCovered(jCas, BaseToken.class, sent).size();
-
- for(int tokenIndex = 0; tokenIndex < tokensInSentence; tokenIndex++) {
-
- positionsWrtToEvent1.add(Integer.toString(tokenIndex - event1Position));
- positionsWrtToEvent2.add(Integer.toString(tokenIndex - event2Position));
- }
-
- String tokensAsString = String.join(" ", tokens).replaceAll("[\r\n]", " ");
- String distanceToTime = String.join(" ", positionsWrtToEvent1);
- String distanceToEvent = String.join(" ", positionsWrtToEvent2);
-
- return tokensAsString + "|" + distanceToTime + "|" + distanceToEvent;
- }
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPositionPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPositionPrinter.java?rev=1774030&r1=1774029&r2=1774030&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPositionPrinter.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPositionPrinter.java Tue Dec 13 15:06:57 2016
@@ -35,7 +35,6 @@ import org.apache.ctakes.temporal.eval.C
import org.apache.ctakes.temporal.eval.THYMEData;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -216,7 +215,7 @@ public class EventTimeRelPositionPrinter
}
}
- String context = getPositionContext(systemView, sentence, time, event);
+ String context = ArgContextProvider.getEventTimePositionContext(systemView, sentence, time, event);
String text = String.format("%s|%s", label, context);
eventTimeRelationsInSentence.add(text.toLowerCase());
}
@@ -230,89 +229,4 @@ public class EventTimeRelPositionPrinter
}
}
}
-
- /**
- * Print indices
- * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
- */
- public static String getPositionContext(
- JCas jCas,
- Sentence sent,
- TimeMention time,
- EventMention event) {
-
- // get sentence as a list of tokens
- List<String> tokens = new ArrayList<>();
- for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- tokens.add(baseToken.getCoveredText());
- }
-
- // find the positions of time and event mentions
- // assume time consists of multipe words; event of one
- int currentPosition = 0; // current token index
- int timeFirstPosition = -1000; // timex's start index
- int timeLastPosition = -1000; // timex's end index
- int eventPosition = -1000; // event's index
- for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- if(time.getBegin() == token.getBegin()) {
- timeFirstPosition = currentPosition; // start of time expression found
- }
- if(time.getEnd() == token.getEnd()) {
- timeLastPosition = currentPosition; // end of time expression found
- }
- if(event.getBegin() == token.getBegin()) {
- eventPosition = currentPosition; // event postion found
- }
- currentPosition++;
- }
-
- // try to locate events that weren't found
- // e.g. "this can be re-discussed tomorrow"
- // "discussed" not found due to incorrect tokenization
- if(eventPosition == -1000) {
- currentPosition = 0;
- for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- if(token.getCoveredText().contains(event.getCoveredText())) {
- eventPosition = currentPosition;
- }
- currentPosition++;
- }
- }
-
- if(eventPosition == -1000) {
- System.out.println("event not found: " + event.getCoveredText());
- System.out.println(sent.getCoveredText());
- System.out.println();
- eventPosition = 0; // just set it to zero for now
- }
-
- // now need to see if some times weren't found
- if(timeFirstPosition == -1000 || timeLastPosition == -1000) {
- System.out.println("time not found: " + time.getCoveredText());
- System.out.println(sent.getCoveredText());
- System.out.println();
- timeFirstPosition = 0; // just set it to zero for now
- timeLastPosition = 0; // just set it to zero for now
- }
-
- List<String> positionsWrtToTime = new ArrayList<>();
- List<String> positionsWrtToEvent = new ArrayList<>();
- int tokensInSentence = JCasUtil.selectCovered(jCas, BaseToken.class, sent).size();
- for(int tokenIndex = 0; tokenIndex < tokensInSentence; tokenIndex++) {
- if(tokenIndex < timeFirstPosition) {
- positionsWrtToTime.add(Integer.toString(tokenIndex - timeFirstPosition));
- } else if(tokenIndex >= timeFirstPosition && tokenIndex <= timeLastPosition) {
- positionsWrtToTime.add("0");
- } else {
- positionsWrtToTime.add(Integer.toString(tokenIndex - timeLastPosition));
- }
- positionsWrtToEvent.add(Integer.toString(tokenIndex - eventPosition));
- }
-
- String tokensAsString = String.join(" ", tokens).replaceAll("[\r\n]", " ");
- String distanceToTime = String.join(" ", positionsWrtToTime);
- String distanceToEvent = String.join(" ", positionsWrtToEvent);
-
- return tokensAsString + "|" + distanceToTime + "|" + distanceToEvent;
- }
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EventEventNeuralEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EventEventNeuralEvaluation.java?rev=1774030&r1=1774029&r2=1774030&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EventEventNeuralEvaluation.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EventEventNeuralEvaluation.java Tue Dec 13 15:06:57 2016
@@ -282,7 +282,7 @@ EvaluationOfTemporalRelations_ImplBase{
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(PreserveEventEventRelations.class));
if (this.useClosure) {
- // aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddClosure.class));//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class));
+ // aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddClosure.class));//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class));
// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(AddContain2Overlap.class));
// aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveBeforeAndOnRelations.class));
}