You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2014/11/11 15:26:06 UTC
svn commit: r1638119 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae:
./ feature/
Author: clin
Date: Tue Nov 11 14:26:05 2014
New Revision: 1638119
URL: http://svn.apache.org/r1638119
Log:
Modify event-discharge/admission Time annotators for i2b2 data (section parsing added).
Modify event-time, event-event annotators for i2b2 data (reverse relation, features).
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionFeatureExtractor.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java?rev=1638119&r1=1638118&r2=1638119&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java Tue Nov 11 14:26:05 2014
@@ -20,6 +20,7 @@ package org.apache.ctakes.temporal.ae;
import java.io.File;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
//import java.io.IOException;
import java.util.List;
@@ -29,6 +30,7 @@ import java.util.Map;
import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
//import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPositionFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor;
import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
@@ -40,6 +42,8 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -63,6 +67,8 @@ import org.cleartk.ml.jar.DefaultDataWri
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import com.google.common.collect.Lists;
+
//import com.google.common.base.Charsets;
public class EventAdmissionTimeAnnotator extends CleartkAnnotator<String> {
@@ -79,7 +85,7 @@ public class EventAdmissionTimeAnnotator
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory);
}
-
+
public static AnalysisEngineDescription createAnnotatorDescription(String modelPath)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
@@ -90,11 +96,11 @@ public class EventAdmissionTimeAnnotator
modelPath);
}
- /**
- * @deprecated use String path instead of File.
- * ClearTK will automatically Resolve the String to an InputStream.
- * This will allow resources to be read within from a jar as well as File.
- */
+ /**
+ * @deprecated use String path instead of File.
+ * ClearTK will automatically Resolve the String to an InputStream.
+ * This will allow resources to be read within from a jar as well as File.
+ */
public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
@@ -107,7 +113,8 @@ public class EventAdmissionTimeAnnotator
private CleartkExtractor<EventMention, BaseToken> contextExtractor;
private NearbyVerbTenseXExtractor verbTensePatternExtractor;
- private SectionHeaderExtractor sectionIDExtractor;
+ // private SectionHeaderExtractor sectionIDExtractor;
+ private EventPositionFeatureExtractor eventPositionExtractor;
private ClosestVerbExtractor closestVerbExtractor;
private TimeXExtractor timeXExtractor;
private EventPropertyExtractor genericExtractor;
@@ -129,7 +136,8 @@ public class EventAdmissionTimeAnnotator
new Covered(),
new Following(3));
this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
- this.sectionIDExtractor = new SectionHeaderExtractor();
+ // this.sectionIDExtractor = new SectionHeaderExtractor();
+ this.eventPositionExtractor = new EventPositionFeatureExtractor();
this.closestVerbExtractor = new ClosestVerbExtractor();
this.timeXExtractor = new TimeXExtractor();
this.genericExtractor = new EventPropertyExtractor();
@@ -149,82 +157,97 @@ public class EventAdmissionTimeAnnotator
break;
}
}
+
+ //2. identify the HOPI section:
+ List<Segment> histories = Lists.newArrayList();
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ for(Segment seg: segments){
+ if (seg.getId().equals("history")){//find the right segment
+ if(JCasUtil.selectCovered(jCas,Sentence.class,seg).size()>0){//ignore empty section
+ histories.add(seg);
+ }
+ }
+ }
+
if (admissionTime != null){
//get event-time1 relations:
Map<List<Annotation>, TemporalTextRelation> dischargeTimeRelationLookup;
- dischargeTimeRelationLookup = new HashMap<>();
- if (this.isTraining()) {
- dischargeTimeRelationLookup = new HashMap<>();
- for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
- Annotation arg1 = relation.getArg1().getArgument();
- Annotation arg2 = relation.getArg2().getArgument();
- // The key is a list of args so we can do bi-directional lookup
- if(arg1 instanceof TimeMention && arg2 instanceof EventMention ){
- if( arg1==admissionTime){
- dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
- continue;
- }
- }else if(arg1 instanceof EventMention && arg2 instanceof TimeMention){
- if( arg2==admissionTime ){
- dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
- continue;
- }
- }
-
- }
- }
-
- for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
- if (eventMention.getEvent() != null) {
- List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
- features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
- features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
- features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
- features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
- features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
- // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
- // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
- if (this.isTraining()) {
- TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(eventMention, admissionTime));
- String category = null;
- if (relation != null) {
- category = relation.getCategory();
- } else {
- relation = dischargeTimeRelationLookup.get(Arrays.asList(admissionTime, eventMention));
+ dischargeTimeRelationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ dischargeTimeRelationLookup = new HashMap<>();
+ for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ if(arg1 instanceof TimeMention && arg2 instanceof EventMention ){
+ if( arg1==admissionTime){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }else if(arg1 instanceof EventMention && arg2 instanceof TimeMention){
+ if( arg2==admissionTime ){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }
+
+ }
+ }
+
+ for (Segment historyOfPresentIll : histories){
+ for (EventMention eventMention : JCasUtil.selectCovered(jCas, EventMention.class, historyOfPresentIll)) {
+ if (eventMention.getClass().equals(EventMention.class)) {//for every gold event
+ List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+ features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
+ features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
+ features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
+ features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
+ // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
+ // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ if (this.isTraining()) {
+ TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(eventMention, admissionTime));
+ String category = null;
if (relation != null) {
- if(relation.getCategory().equals("OVERLAP")){
- category = relation.getCategory();
- }else if (relation.getCategory().equals("BEFORE")){
- category = "AFTER";
- }else if (relation.getCategory().equals("AFTER")){
- category = "BEFORE";
+ category = relation.getCategory();
+ } else {
+ relation = dischargeTimeRelationLookup.get(Arrays.asList(admissionTime, eventMention));
+ if (relation != null) {
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
+ }
}
}
- }
- if(category!=null){
- this.dataWriter.write(new Instance<>(category, features));
- }
- } else {
- String outcome = this.classifier.classify(features);
- if(outcome!=null){
- // add the relation to the CAS
- RelationArgument relArg1 = new RelationArgument(jCas);
- relArg1.setArgument(eventMention);
- relArg1.setRole("Argument");
- relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(jCas);
- relArg2.setArgument(admissionTime);
- relArg2.setRole("Related_to");
- relArg2.addToIndexes();
- TemporalTextRelation relation = new TemporalTextRelation(jCas);
- relation.setArg1(relArg1);
- relation.setArg2(relArg2);
- relation.setCategory(outcome);
- relation.addToIndexes();
- }else{
- System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + admissionTime.getCoveredText());
+ if(category!=null){
+ this.dataWriter.write(new Instance<>(category, features));
+ }
+ } else {
+ String outcome = this.classifier.classify(features);
+ if(outcome!=null){
+ // add the relation to the CAS
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(eventMention);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(admissionTime);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(outcome);
+ relation.addToIndexes();
+ }else{
+ System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + admissionTime.getCoveredText());
+ }
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java?rev=1638119&r1=1638118&r2=1638119&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java Tue Nov 11 14:26:05 2014
@@ -20,6 +20,7 @@ package org.apache.ctakes.temporal.ae;
import java.io.File;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
//import java.io.IOException;
import java.util.List;
@@ -31,7 +32,8 @@ import org.apache.ctakes.temporal.ae.fea
import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor;
import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
-import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+//import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPositionFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.TimeXExtractor;
import org.apache.ctakes.temporal.ae.feature.UmlsSingleFeatureExtractor;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
@@ -40,6 +42,8 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -63,6 +67,8 @@ import org.cleartk.ml.jar.DefaultDataWri
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import com.google.common.collect.Lists;
+
//import com.google.common.base.Charsets;
public class EventDischargeTimeAnnotator extends CleartkAnnotator<String> {
@@ -89,11 +95,11 @@ public class EventDischargeTimeAnnotator
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
modelPath);
}
- /**
- * @deprecated use String path instead of File.
- * ClearTK will automatically Resolve the String to an InputStream.
- * This will allow resources to be read within from a jar as well as File.
- */
+ /**
+ * @deprecated use String path instead of File.
+ * ClearTK will automatically Resolve the String to an InputStream.
+ * This will allow resources to be read within from a jar as well as File.
+ */
public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
@@ -106,7 +112,8 @@ public class EventDischargeTimeAnnotator
private CleartkExtractor<EventMention, BaseToken> contextExtractor;
private NearbyVerbTenseXExtractor verbTensePatternExtractor;
- private SectionHeaderExtractor sectionIDExtractor;
+ // private SectionHeaderExtractor sectionIDExtractor;
+ private EventPositionFeatureExtractor eventPositionExtractor;
private ClosestVerbExtractor closestVerbExtractor;
private TimeXExtractor timeXExtractor;
private EventPropertyExtractor genericExtractor;
@@ -128,7 +135,8 @@ public class EventDischargeTimeAnnotator
new Covered(),
new Following(3));
this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
- this.sectionIDExtractor = new SectionHeaderExtractor();
+ // this.sectionIDExtractor = new SectionHeaderExtractor();
+ this.eventPositionExtractor = new EventPositionFeatureExtractor();
this.closestVerbExtractor = new ClosestVerbExtractor();
this.timeXExtractor = new TimeXExtractor();
this.genericExtractor = new EventPropertyExtractor();
@@ -142,42 +150,59 @@ public class EventDischargeTimeAnnotator
TimeMention dischargeTime = null;
//TODO
//may need better way to identify Discharge Time other than relative span information:
- for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, 40, 60)) {
- if(time.getTimeClass().equals("DATE")){
- dischargeTime = time;
- break;
+ findDischarge:
+ for (Sentence sent : JCasUtil.selectCovered(jCas, Sentence.class, 40, 60)) {
+ for(TimeMention time: JCasUtil.selectCovered(jCas, TimeMention.class, sent)){
+ if(time.getTimeClass().equals("DATE")){
+ dischargeTime = time;
+ break findDischarge;
+ }
+ }
+ }
+
+ //2. identify the Hospital Course section:
+ List<Segment> courses = Lists.newArrayList();
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ for(Segment seg: segments){
+ if (seg.getId().equals("course")){//find the right segment
+ if(JCasUtil.selectCovered(jCas,Sentence.class,seg).size()>0){//ignore empty section
+ courses.add(seg);
+ }
}
}
- if (dischargeTime != null){
- //get event-time1 relations:
- Map<List<Annotation>, TemporalTextRelation> dischargeTimeRelationLookup;
- dischargeTimeRelationLookup = new HashMap<>();
- if (this.isTraining()) {
- dischargeTimeRelationLookup = new HashMap<>();
- for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
- Annotation arg1 = relation.getArg1().getArgument();
- Annotation arg2 = relation.getArg2().getArgument();
- // The key is a list of args so we can do bi-directional lookup
- if(arg1 instanceof TimeMention && arg2 instanceof EventMention ){
- if( arg1==dischargeTime){
- dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
- continue;
- }
- }else if(arg1 instanceof EventMention && arg2 instanceof TimeMention){
- if( arg2==dischargeTime ){
- dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
- continue;
- }
- }
-
- }
- }
-
- for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
- if (eventMention.getEvent() != null) {
+
+
+ //get event-time1 relations:
+ Map<List<Annotation>, TemporalTextRelation> dischargeTimeRelationLookup;
+ dischargeTimeRelationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ dischargeTimeRelationLookup = new HashMap<>();
+ for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ if(arg1 instanceof TimeMention && arg2!=null && arg2 instanceof EventMention ){
+ if( arg1==dischargeTime){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }else if(arg1 instanceof EventMention && ( arg2 == null || arg2 instanceof TimeMention)){
+ if( arg2==dischargeTime ){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }
+
+ }
+ }
+
+ for(Segment course: courses){
+ for (EventMention eventMention : JCasUtil.selectCovered(jCas, EventMention.class, course)) {
+ if (eventMention.getClass().equals(EventMention.class)) {//for every gold event
List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
- features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
@@ -209,21 +234,22 @@ public class EventDischargeTimeAnnotator
String outcome = this.classifier.classify(features);
if(outcome!=null){
// add the relation to the CAS
- RelationArgument relArg1 = new RelationArgument(jCas);
- relArg1.setArgument(eventMention);
- relArg1.setRole("Argument");
- relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(jCas);
- relArg2.setArgument(dischargeTime);
- relArg2.setRole("Related_to");
- relArg2.addToIndexes();
- TemporalTextRelation relation = new TemporalTextRelation(jCas);
- relation.setArg1(relArg1);
- relation.setArg2(relArg2);
- relation.setCategory(outcome);
- relation.addToIndexes();
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(eventMention);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(dischargeTime);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(outcome);
+ relation.addToIndexes();
}else{
- System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + dischargeTime.getCoveredText());
+ if (dischargeTime!=null)
+ System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + dischargeTime.getCoveredText());
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java?rev=1638119&r1=1638118&r2=1638119&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventI2B2RelationAnnotator.java Tue Nov 11 14:26:05 2014
@@ -35,6 +35,7 @@ import org.apache.ctakes.temporal.ae.fea
import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.CoordinateFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.DependingVerbsFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependencyParseUtils;
//import org.apache.ctakes.temporal.ae.feature.EventInBetweenPropertyExtractor;
//import org.apache.ctakes.temporal.ae.feature.EventOutsidePropertyExtractor;
import org.apache.ctakes.temporal.ae.feature.SpecialAnnotationRelationExtractor;
@@ -58,6 +59,7 @@ import org.apache.ctakes.temporal.ae.fea
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
//import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -139,10 +141,10 @@ public class EventEventI2B2RelationAnnot
, new OverlappedHeadFeaturesExtractor()
, new SRLRelationFeaturesExtractor()
, new NumberOfEventsInTheSameSentenceExtractor()
- , new EventPositionRelationFeaturesExtractor() //not helpful
- , new TimeXRelationFeaturesExtractor() //not helpful
+ // , new EventPositionRelationFeaturesExtractor() //not helpful
+ // , new TimeXRelationFeaturesExtractor() //not helpful
, new ConjunctionRelationFeaturesExtractor()
- , new DeterminerRelationFeaturesExtractor()
+ // , new DeterminerRelationFeaturesExtractor()
, new EventTimeRelationFeatureExtractor()
, new TokenPropertyFeaturesExtractor()
, new DependingVerbsFeatureExtractor()
@@ -179,34 +181,35 @@ public class EventEventI2B2RelationAnnot
int eventNum = events.size();
for (int i = 0; i < eventNum-1; i++){
+ EventMention eventB = events.get(i);
for(int j = i+1; j < eventNum; j++){
EventMention eventA = events.get(j);
- EventMention eventB = events.get(i);
- if(this.isTraining()){
- //pairing covering system events:
- for (EventMention event1 : coveringMap.get(eventA)){
+ if(j-i==1 || j-i==eventNum-1||ifDependent(jCas, eventA, eventB)){
+ if(this.isTraining()){
+ //pairing covering system events:
+ for (EventMention event1 : coveringMap.get(eventA)){
+ for(EventMention event2 : coveringMap.get(eventB)){
+ pairs.add(new IdentifiedAnnotationPair(event1, event2));
+ }
+ pairs.add(new IdentifiedAnnotationPair(event1, eventB));
+ }
for(EventMention event2 : coveringMap.get(eventB)){
- pairs.add(new IdentifiedAnnotationPair(event1, event2));
+ pairs.add(new IdentifiedAnnotationPair(eventA, event2));
+ }
+ //pairing covered system events:
+ for(EventMention event1 : JCasUtil.selectCovered(jCas, EventMention.class, eventA)){
+ for(EventMention event2 : JCasUtil.selectCovered(jCas, EventMention.class, eventB)){
+ pairs.add(new IdentifiedAnnotationPair(event1, event2));
+ }
+ pairs.add(new IdentifiedAnnotationPair(event1, eventB));
}
- pairs.add(new IdentifiedAnnotationPair(event1, eventB));
- }
- for(EventMention event2 : coveringMap.get(eventB)){
- pairs.add(new IdentifiedAnnotationPair(eventA, event2));
- }
- //pairing covered system events:
- for(EventMention event1 : JCasUtil.selectCovered(jCas, EventMention.class, eventA)){
for(EventMention event2 : JCasUtil.selectCovered(jCas, EventMention.class, eventB)){
- pairs.add(new IdentifiedAnnotationPair(event1, event2));
+ pairs.add(new IdentifiedAnnotationPair(eventA, event2));
}
- pairs.add(new IdentifiedAnnotationPair(event1, eventB));
- }
- for(EventMention event2 : JCasUtil.selectCovered(jCas, EventMention.class, eventB)){
- pairs.add(new IdentifiedAnnotationPair(eventA, event2));
}
+ pairs.add(new IdentifiedAnnotationPair(eventA, eventB));
}
- pairs.add(new IdentifiedAnnotationPair(eventA, eventB));
-
}
}
@@ -229,6 +232,24 @@ public class EventEventI2B2RelationAnnot
return pairs;
}
+
+ private static boolean ifDependent(JCas jCas, EventMention ev1, EventMention ev2) {
+ for (ConllDependencyNode firstNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, ev1)) {//get the covered conll nodes within the first event
+ String pos = firstNode.getPostag();
+ if(pos.startsWith("NN")||pos.startsWith("VB")){//get the head node
+ for(ConllDependencyNode nextNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, ev2)){//get the covered conll nodes within the next event
+ pos = nextNode.getPostag();
+ if(pos.startsWith("NN")||pos.startsWith("VB")){//get the head node
+ ConllDependencyNode ancestor = DependencyParseUtils.getCommonAncestor(firstNode, nextNode);
+ if(ancestor==firstNode || ancestor==nextNode){
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+ }
@Override
protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
@@ -262,13 +283,10 @@ public class EventEventI2B2RelationAnnot
if (relation != null && relation instanceof TemporalTextRelation) {
if(relation.getCategory().equals("OVERLAP")){
category = relation.getCategory();
- // }else if (relation.getCategory().equals("BEFORE")){
- // category = "AFTER";
- // }else if (relation.getCategory().equals("AFTER")){
- // category = "BEFORE";
- // }
- }else{
- category = relation.getCategory() + "-1";
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java?rev=1638119&r1=1638118&r2=1638119&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeI2B2RelationAnnotator.java Tue Nov 11 14:26:05 2014
@@ -225,13 +225,10 @@ public class EventTimeI2B2RelationAnnota
if (relation != null) {
if(relation.getCategory().equals("OVERLAP")){
category = relation.getCategory();
- // }else if (relation.getCategory().equals("BEFORE")){
- // category = "AFTER";
- // }else if (relation.getCategory().equals("AFTER")){
- // category = "BEFORE";
- // }
- }else{
- category = relation.getCategory() + "-1";
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
}
}
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionFeatureExtractor.java?rev=1638119&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPositionFeatureExtractor.java Tue Nov 11 14:26:05 2014
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+//import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Event postion feature extractor:
+ * 1. check if an event in in the first/last 5 sentence
+ * 2. if an event is the fisrt/last 3 events in its section
+ * 3. if an event is the first/last 5 events in a note
+ * @author CH151862
+ *
+ */
+public class EventPositionFeatureExtractor implements FeatureExtractor1 {
+
+ private String name;
+
+ // private Logger logger = Logger.getLogger(this.getClass().getName());
+
+ public EventPositionFeatureExtractor() {
+ super();
+ this.name = "EventPosition";
+
+ }
+
+ @Override
+ public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
+ List<Feature> features = new ArrayList<>();
+
+ //check if it within the last/first 5 sentences
+ Collection<Sentence> sentences = JCasUtil.select(view, Sentence.class);
+ List<Sentence> sentList = Lists.newArrayList();
+ sentList.addAll(sentences);
+ int sentSize = sentList.size();
+ if( sentSize >= 5){
+ if( containEvent(sentList.get(0), sentList.get(4), annotation)){
+ Feature feature = new Feature(this.name, "Within_Top_5_sentences");
+ features.add(feature);
+ }
+ if( containEvent(sentList.get(sentSize-5), sentList.get(sentSize-1), annotation)){
+ Feature feature = new Feature(this.name, "Within_Last_5_sentences");
+ features.add(feature);
+ }
+ }
+
+ //check if it within the last/first 5 events
+ Collection<EventMention> events = JCasUtil.select(view, EventMention.class);
+ List<EventMention> eventList = Lists.newArrayList();
+ //filter events
+ for(EventMention event : events){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ eventList.add(event);
+ }
+ }
+ int eventSize = eventList.size();
+ if(eventSize >= 5){
+ if( containEvent(eventList.get(0), eventList.get(4), annotation)){
+ Feature feature = new Feature(this.name, "Within_Top_5_events");
+ features.add(feature);
+ }
+ if( containEvent(eventList.get(eventSize-5), eventList.get(eventSize-1), annotation)){
+ Feature feature = new Feature(this.name, "Within_Last_5_events");
+ features.add(feature);
+ }
+ }
+
+ //get covering segment:
+ Map<EventMention, Collection<Segment>> coveringMap =
+ JCasUtil.indexCovering(view, EventMention.class, Segment.class);
+ EventMention targetTokenAnnotation = (EventMention)annotation;
+ Collection<Segment> segList = coveringMap.get(targetTokenAnnotation);
+
+ //if an event is the first/last 3 events in its section
+ for(Segment seg : segList) {
+ String segname = seg.getId();
+ if (!segname.equals("SIMPLE_SEGMENT")){//remove simple segment
+ List<EventMention> segEvents = JCasUtil.selectCovered(view, EventMention.class, seg);
+ List<EventMention> realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention event : segEvents){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ segEvents = realEvents;
+ int segEventSize = segEvents.size();
+ if(segEventSize >= 3){
+ if( containEvent(segEvents.get(0), segEvents.get(2), annotation)){
+ Feature feature = new Feature(this.name, "Within_Top_3_events_of"+segname);
+ features.add(feature);
+ }
+ if( containEvent(segEvents.get(segEventSize-3), segEvents.get(segEventSize-1), annotation)){
+ Feature feature = new Feature(this.name, "Within_Last_3_events_of"+segname);
+ features.add(feature);
+ }
+ }
+ }
+ }
+
+ return features;
+ }
+
+ private static boolean containEvent(Annotation arg1, Annotation arg2,
+ Annotation target) {
+ if(target.getBegin()>=arg1.getBegin() && target.getEnd()<= arg2.getEnd())
+ return true;
+ return false;
+ }
+
+}