You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2014/11/18 16:02:39 UTC
svn commit: r1640360 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/
eval/
Author: clin
Date: Tue Nov 18 15:02:38 2014
New Revision: 1640360
URL: http://svn.apache.org/r1640360
Log:
check in the best performing i2b2 settings so far.
Applied system-events to within-sent ee,et and event-sectionTime.
TLINK:
Precision : 0.6973
Recall : 0.6673
Average P&R : 0.6857
F measure : 0.6820
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfI2B2TemporalRelations.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java?rev=1640360&r1=1640359&r2=1640360&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java Tue Nov 18 15:02:38 2014
@@ -8,6 +8,7 @@ import java.util.List;
import java.util.Map;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.CheckSpecialWordRelationExtractor;
@@ -45,6 +46,7 @@ import org.cleartk.ml.jar.GenericJarClas
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
public class ConsecutiveSentencesEventEventRelationAnnotator extends RelationExtractorAnnotator {
@@ -192,6 +194,25 @@ public class ConsecutiveSentencesEventEv
}
}
}
+
+ //add system generated events:
+// if(this.isTraining()){
+// List<IdentifiedAnnotationPair> eventPairs = ImmutableList.copyOf(pairs);
+// for(IdentifiedAnnotationPair epair: eventPairs){
+// EventMention eventA = (EventMention) epair.getArg1();
+// EventMention eventB = (EventMention) epair.getArg2();
+// //pairing covered system events:
+// for(EventMention event1 : JCasUtil.selectCovered(jCas, EventMention.class, eventA)){
+// for(EventMention event2 : JCasUtil.selectCovered(jCas, EventMention.class, eventB)){
+// pairs.add(new IdentifiedAnnotationPair(event1, event2));
+// }
+// pairs.add(new IdentifiedAnnotationPair(event1, eventB));
+// }
+// for(EventMention event2 : JCasUtil.selectCovered(jCas, EventMention.class, eventB)){
+// pairs.add(new IdentifiedAnnotationPair(eventA, event2));
+// }
+// }
+// }
return pairs;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java?rev=1640360&r1=1640359&r2=1640360&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java Tue Nov 18 15:02:38 2014
@@ -8,6 +8,7 @@ import java.util.List;
import java.util.Map;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
@@ -43,6 +44,7 @@ import org.cleartk.ml.jar.GenericJarClas
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
public class ConsecutiveSentencesEventTimeRelationAnnotator extends RelationExtractorAnnotator {
@@ -78,7 +80,7 @@ public class ConsecutiveSentencesEventTi
protected List<RelationFeaturesExtractor> getFeatureExtractors() {
return Lists.newArrayList(
new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
-// , new OverlappedHeadFeaturesExtractor()
+ // , new OverlappedHeadFeaturesExtractor()
, new EventArgumentPropertyExtractor()
, new PartOfSpeechFeaturesExtractor()
, new NumberOfEventTimeBetweenCandidatesExtractor()
@@ -87,7 +89,7 @@ public class ConsecutiveSentencesEventTi
, new SectionHeaderRelationExtractor()
, new TimeXRelationFeaturesExtractor()
, new EventPositionRelationFeaturesExtractor()
-// , new DeterminerRelationFeaturesExtractor()
+ // , new DeterminerRelationFeaturesExtractor()
);
}
@@ -143,10 +145,22 @@ public class ConsecutiveSentencesEventTi
}
}
}
-
+
}
}
+ //add system generated events:
+// if(this.isTraining()){
+// List<IdentifiedAnnotationPair> eventPairs = ImmutableList.copyOf(pairs);
+// for(IdentifiedAnnotationPair epair: eventPairs){
+// EventMention eventA = (EventMention) epair.getArg1();
+// TimeMention time = (TimeMention) epair.getArg2();
+// //pairing covered system events:
+// for(EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, eventA)){
+// pairs.add(new IdentifiedAnnotationPair(event, time));
+// }
+// }
+// }
return pairs;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java?rev=1640360&r1=1640359&r2=1640360&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAdmissionTimeAnnotator.java Tue Nov 18 15:02:38 2014
@@ -60,6 +60,7 @@ import org.cleartk.ml.feature.extractor.
import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
import org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
import org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.CombinedExtractor1;
import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
import org.cleartk.ml.feature.extractor.TypePathExtractor;
@@ -158,13 +159,13 @@ public class EventAdmissionTimeAnnotator
}
}
- //2. identify the HOPI section:
- List<Segment> histories = Lists.newArrayList();
+ //2. identify the Hospital Course section:
+ List<Segment> courses = Lists.newArrayList();
Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
for(Segment seg: segments){
- if (seg.getId().equals("history")){//find the right segment
+ if (seg.getId().equals("course")){//find the right segment
if(JCasUtil.selectCovered(jCas,Sentence.class,seg).size()>0){//ignore empty section
- histories.add(seg);
+ courses.add(seg);
}
}
}
@@ -194,64 +195,104 @@ public class EventAdmissionTimeAnnotator
}
}
- for (Segment historyOfPresentIll : histories){
- for (EventMention eventMention : JCasUtil.selectCovered(jCas, EventMention.class, historyOfPresentIll)) {
- if (eventMention.getClass().equals(EventMention.class)) {//for every gold event
- List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
- features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
- // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
- features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
- features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
- features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
- features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
- // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
- // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
- if (this.isTraining()) {
- TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(eventMention, admissionTime));
- String category = null;
+ Map<EventMention, Collection<EventMention>> coveringMap =
+ JCasUtil.indexCovering(jCas, EventMention.class, EventMention.class);
+ for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
+ if (eventMention.getClass().equals(EventMention.class) && !isDischarge(eventMention) && !isAdmission(eventMention) && !inCourseSection(eventMention,courses)) {//for every gold event, not discharge, not admission, not in course section
+ List<Feature> features = extractFeatures(jCas,eventMention);
+ if (this.isTraining()) {
+ TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(eventMention, admissionTime));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = dischargeTimeRelationLookup.get(Arrays.asList(admissionTime, eventMention));
if (relation != null) {
- category = relation.getCategory();
- } else {
- relation = dischargeTimeRelationLookup.get(Arrays.asList(admissionTime, eventMention));
- if (relation != null) {
- if(relation.getCategory().equals("OVERLAP")){
- category = relation.getCategory();
- }else if (relation.getCategory().equals("BEFORE")){
- category = "AFTER";
- }else if (relation.getCategory().equals("AFTER")){
- category = "BEFORE";
- }
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
}
}
- if(category!=null){
- this.dataWriter.write(new Instance<>(category, features));
+ }
+ if(category!=null){
+ this.dataWriter.write(new Instance<>(category, features));
+ //add nearby system-generated events as additional instances
+ Collection<EventMention> eventList = coveringMap.get(eventMention);
+ for(EventMention covEvent : eventList){
+ if(!covEvent.getClass().equals(EventMention.class)){
+ List<Feature> covEvfeatures = extractFeatures(jCas,covEvent);
+ this.dataWriter.write(new Instance<>(category, covEvfeatures));
+ }
}
- } else {
- String outcome = this.classifier.classify(features);
- if(outcome!=null){
- // add the relation to the CAS
- RelationArgument relArg1 = new RelationArgument(jCas);
- relArg1.setArgument(eventMention);
- relArg1.setRole("Argument");
- relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(jCas);
- relArg2.setArgument(admissionTime);
- relArg2.setRole("Related_to");
- relArg2.addToIndexes();
- TemporalTextRelation relation = new TemporalTextRelation(jCas);
- relation.setArg1(relArg1);
- relation.setArg2(relArg2);
- relation.setCategory(outcome);
- relation.addToIndexes();
- }else{
- System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + admissionTime.getCoveredText());
+ for(EventMention covedEvent : JCasUtil.selectCovered(jCas, EventMention.class, eventMention)){//select covered events
+ List<Feature> covedEvfeatures = extractFeatures(jCas,covedEvent);
+ this.dataWriter.write(new Instance<>(category, covedEvfeatures));
}
}
+ } else {
+ String outcome = this.classifier.classify(features);
+ if(outcome!=null){
+ // add the relation to the CAS
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(eventMention);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(admissionTime);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(outcome);
+ relation.addToIndexes();
+ }else{
+ System.out.println("cannot classify "+ eventMention.getCoveredText()+" and " + admissionTime.getCoveredText());
+ }
}
}
}
}
+
+ }
+
+ private static boolean inCourseSection(EventMention event,
+ List<Segment> courses) {
+ for(Segment course: courses){
+ if(course.getBegin()<= event.getBegin() && course.getEnd()>= event.getEnd()){
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isAdmission(EventMention event) {
+ if(event.getEnd()<=15 && event.getCoveredText().equalsIgnoreCase("admission"))
+ return true;
+ return false;
+ }
+
+ private static boolean isDischarge(EventMention event) {
+ if(event.getEnd()<=40 && event.getCoveredText().equalsIgnoreCase("discharge"))
+ return true;
+ return false;
+ }
+
+ private List<Feature> extractFeatures(JCas jCas, EventMention eventMention) throws CleartkExtractorException {
+ List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+ features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
+ features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
+ features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
+ features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
+ // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
+ // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ return features;
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java?rev=1640360&r1=1640359&r2=1640360&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventDischargeTimeAnnotator.java Tue Nov 18 15:02:38 2014
@@ -60,6 +60,7 @@ import org.cleartk.ml.feature.extractor.
import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
import org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
import org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.CombinedExtractor1;
import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
import org.cleartk.ml.feature.extractor.TypePathExtractor;
@@ -193,20 +194,12 @@ public class EventDischargeTimeAnnotator
}
}
+ Map<EventMention, Collection<EventMention>> coveringMap =
+ JCasUtil.indexCovering(jCas, EventMention.class, EventMention.class);
for(Segment course: courses){
for (EventMention eventMention : JCasUtil.selectCovered(jCas, EventMention.class, course)) {
if (eventMention.getClass().equals(EventMention.class)) {//for every gold event
- List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
- features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
- // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
- features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
- features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
- features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
- features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
- features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
- // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
- // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ List<Feature> features = extractFeatures(jCas,eventMention);
if (this.isTraining()) {
TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(eventMention, dischargeTime));
String category = null;
@@ -226,6 +219,18 @@ public class EventDischargeTimeAnnotator
}
if(category!=null){
this.dataWriter.write(new Instance<>(category, features));
+ //add nearby system-generated events as additional instances
+ Collection<EventMention> eventList = coveringMap.get(eventMention);
+ for(EventMention covEvent : eventList){
+ if(!covEvent.getClass().equals(EventMention.class)){
+ List<Feature> covEvfeatures = extractFeatures(jCas,covEvent);
+ this.dataWriter.write(new Instance<>(category, covEvfeatures));
+ }
+ }
+ for(EventMention covedEvent : JCasUtil.selectCovered(jCas, EventMention.class, eventMention)){//select covered events
+ List<Feature> covedEvfeatures = extractFeatures(jCas,covedEvent);
+ this.dataWriter.write(new Instance<>(category, covedEvfeatures));
+ }
}
} else {
String outcome = this.classifier.classify(features);
@@ -253,4 +258,19 @@ public class EventDischargeTimeAnnotator
}
}
}
+
+ private List<Feature> extractFeatures(JCas jCas, EventMention eventMention) throws CleartkExtractorException {
+ List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+ features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
+ features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb
+ features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type
+ features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls features
+ // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
+ // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ return features;
+ }
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfI2B2TemporalRelations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfI2B2TemporalRelations.java?rev=1640360&r1=1640359&r2=1640360&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfI2B2TemporalRelations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfI2B2TemporalRelations.java Tue Nov 18 15:02:38 2014
@@ -362,8 +362,8 @@ EvaluationOfTemporalRelations_ImplBase{
// HideOutput hider = new HideOutput();
JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_TIME), "-c", "0.0002", "-w2","0.5","-w3","5","-w4","8");//"-h","0","-c", "1000");//optArray);//"-c", "0.05");//
JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_EVENT), "-c", "0.0002","-w2","0.5","-w3","4","-w4","3");
- JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_DISCHARGE), "-h","0","-c", "1000","-w2","23","-w3","24");
- JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_ADMISSION), "-h","0","-c", "1000","-w2","22","-w3","5");
+ JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_DISCHARGE), "-h","0","-c", "1000");//,"-w2","23","-w3","24");
+ JarClassifierBuilder.trainAndPackage(new File(directory,EVENT_ADMISSION), "-h","0","-c", "1000");//,"-w2","22","-w3","5");
// JarClassifierBuilder.trainAndPackage(new File(directory,TIME_ADMISSION), "-h","0","-c", "1000");
// JarClassifierBuilder.trainAndPackage(new File(directory,TIME_DISCHARGE), "-h","0","-c", "1000");
JarClassifierBuilder.trainAndPackage(new File(directory,TEMP_CROSSSENT), "-h","0","-c", "1000","-w2","3","-w3","0.1");