You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2014/11/04 17:30:37 UTC
svn commit: r1636640 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae:
./ feature/
Author: clin
Date: Tue Nov 4 16:30:36 2014
New Revision: 1636640
URL: http://svn.apache.org/r1636640
Log:
add several annotators used for i2b2 data
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java (with props)
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,268 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.CheckSpecialWordRelationExtractor;
+//import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPositionRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventTimeRelationFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.NumberOfEventTimeBetweenCandidatesExtractor;
+import org.apache.ctakes.temporal.ae.feature.OverlappedHeadFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SRLRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SectionHeaderRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeForMixEventTimeExtractor;
+import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+//import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+//import org.apache.ctakes.typesystem.type.textspan.Paragraph;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class ConsecutiveSentencesEventEventRelationAnnotator extends RelationExtractorAnnotator {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory,
+ double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ ConsecutiveSentencesEventEventRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory,
+ RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ // not sure why this has to be cast; something funny going on in uimaFIT maybe?
+ (float) probabilityOfKeepingANegativeExample);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ ConsecutiveSentencesEventEventRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ @Override
+ protected List<RelationFeaturesExtractor> getFeatureExtractors() {
+ return Lists.newArrayList(
+ new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
+ , new OverlappedHeadFeaturesExtractor()
+ , new EventArgumentPropertyExtractor()
+ , new PartOfSpeechFeaturesExtractor()
+ , new NumberOfEventTimeBetweenCandidatesExtractor()
+ , new UmlsFeatureExtractor()
+ , new SRLRelationFeaturesExtractor()
+ , new SectionHeaderRelationExtractor()
+ , new TimeXRelationFeaturesExtractor()
+ , new EventPositionRelationFeaturesExtractor()
+ , new CheckSpecialWordRelationExtractor()
+ , new EventTimeRelationFeatureExtractor()
+// , new DeterminerRelationFeaturesExtractor()
+ );
+ }
+
+ @Override
+ protected Class<? extends Annotation> getCoveringClass() {
+ return DocumentAnnotation.class;
+ }
+
+ @SuppressWarnings("null")
+ @Override
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas jCas,
+ Annotation document) {
+
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ List<Segment> segList = Lists.newArrayList();
+ for(Segment seg: segments){
+ if (!seg.getId().equals("SIMPLE_SEGMENT")){//remove simple segment
+ segList.add(seg);
+ }
+ }
+
+ for(Segment segment : segList){
+ List<Sentence> sentList = JCasUtil.selectCovered(jCas, Sentence.class, segment);
+ int sentListLength = sentList.size();
+ if( sentListLength >=2){
+ for (int i=0; i<sentListLength-1; i++ ) {
+ Sentence currentSent = sentList.get(i);
+ Sentence nextSent = sentList.get(i+1);
+ List<EventMention> currentEvents = JCasUtil.selectCovered(jCas, EventMention.class, currentSent);
+ List<EventMention> nextEvents = JCasUtil.selectCovered(jCas, EventMention.class, nextSent);
+
+ //filtering events
+ List<EventMention> realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention event : currentEvents){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ currentEvents = realEvents;
+ realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention event : nextEvents){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ nextEvents = realEvents;
+
+ //scheme2 : pairing major events + time
+ int currentSize = currentEvents == null ? 0 : currentEvents.size();
+ int nextSize = nextEvents == null ? 0 : nextEvents.size();
+ if( currentSize == 0 || nextSize ==0){
+ continue;
+ }
+
+ EventMention currentFirst = null;
+ EventMention currentLast = null;
+ EventMention nextFirst = null;
+ EventMention nextLast = null;
+
+ if( currentSize ==1 ){
+ currentFirst = currentEvents.get(0);
+ }else if(currentSize > 1){
+ currentFirst = currentEvents.get(0);
+ currentLast = currentEvents.get(currentSize-1);
+ }
+
+ if( nextSize == 1){
+ nextFirst = nextEvents.get(0);
+ }else if( nextSize > 1 ){
+ nextFirst = nextEvents.get(0);
+ nextLast = nextEvents.get(nextSize-1);
+ }
+
+ //pair them up
+ if(currentFirst != null){
+ if(nextFirst != null){
+ pairs.add(new IdentifiedAnnotationPair(nextFirst, currentFirst));
+ }
+ if( nextLast != null){
+ pairs.add(new IdentifiedAnnotationPair(nextLast, currentFirst));
+ }
+ }
+ if( currentLast != null ){
+ if(nextFirst != null){
+ pairs.add(new IdentifiedAnnotationPair(nextFirst, currentLast));
+ }
+ if( nextLast != null){
+ pairs.add(new IdentifiedAnnotationPair(nextLast, currentLast));
+ }
+ }
+ }
+ }
+ }
+
+ return pairs;
+ }
+
+
+ // private static boolean hasOverlapTokens(JCas jCas, EventMention event1, EventMention event2) {
+ // List<WordToken> currentTokens = JCasUtil.selectCovered(jCas, WordToken.class, event1);
+ // int tokenSize1 = currentTokens.size();
+ // List<WordToken> nextTokens = JCasUtil.selectCovered(jCas, WordToken.class, event2);
+ // int tokenSize2 = nextTokens.size();
+ // int tokenSize = Math.min(tokenSize1, tokenSize2);
+ // int matches = 0;
+ // for(WordToken t1: currentTokens){
+ // for(WordToken t2: nextTokens){
+ // if(t1.getCoveredText().toLowerCase().equals(t2.getCoveredText().toLowerCase())){
+ // matches++;
+ // }
+ // }
+ // }
+ // float matchRatio = (float)matches/tokenSize;
+ // if( matchRatio >= 0.5)
+ // return true;
+ // return false;
+ // }
+
+ @Override
+ protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2, String predictedCategory) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.addToIndexes();
+ }
+
+
+ @Override
+ protected String getRelationCategory(
+ Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = relationLookup.get(Arrays.asList(arg2, arg1));
+ if (relation != null) {
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
+ }
+ // else{
+ // category = relation.getCategory() + "-1";
+ // }
+ }
+ }
+ if (category == null && coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+ category = NO_RELATION_CATEGORY;
+ }
+ return category;
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventEventRelationAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,205 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPositionRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.NumberOfEventTimeBetweenCandidatesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SRLRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SectionHeaderRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeForMixEventTimeExtractor;
+import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+//import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+//import org.apache.ctakes.typesystem.type.textspan.Paragraph;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class ConsecutiveSentencesEventTimeRelationAnnotator extends RelationExtractorAnnotator {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory,
+ double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ ConsecutiveSentencesEventTimeRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory,
+ RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ // not sure why this has to be cast; something funny going on in uimaFIT maybe?
+ (float) probabilityOfKeepingANegativeExample);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ ConsecutiveSentencesEventTimeRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ @Override
+ protected List<RelationFeaturesExtractor> getFeatureExtractors() {
+ return Lists.newArrayList(
+ new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
+// , new OverlappedHeadFeaturesExtractor()
+ , new EventArgumentPropertyExtractor()
+ , new PartOfSpeechFeaturesExtractor()
+ , new NumberOfEventTimeBetweenCandidatesExtractor()
+ , new UmlsFeatureExtractor()
+ , new SRLRelationFeaturesExtractor()
+ , new SectionHeaderRelationExtractor()
+ , new TimeXRelationFeaturesExtractor()
+ , new EventPositionRelationFeaturesExtractor()
+// , new DeterminerRelationFeaturesExtractor()
+ );
+ }
+
+ @Override
+ protected Class<? extends Annotation> getCoveringClass() {
+ return DocumentAnnotation.class;
+ }
+
+ @SuppressWarnings("null")
+ @Override
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas jCas,
+ Annotation document) {
+
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ List<Segment> segList = Lists.newArrayList();
+ for(Segment seg: segments){
+ if (!seg.getId().equals("SIMPLE_SEGMENT")){//remove simple segment
+ segList.add(seg);
+ }
+ }
+
+ for(Segment segment : segList){
+ List<TimeMention> segTimes = JCasUtil.selectCovered(jCas, TimeMention.class, segment);
+ for( TimeMention time : segTimes){
+ //get the sentence before this timex
+ List<Sentence> consecutiveSents = Lists.newArrayList();
+ List<Sentence> sents = JCasUtil.selectPreceding(jCas, Sentence.class, time, 1);
+ if(sameSegment(sents,segment)){
+ consecutiveSents.addAll(sents);
+ }
+ sents = JCasUtil.selectFollowing(jCas, Sentence.class, time, 1);
+ if(sameSegment(sents,segment)){
+ consecutiveSents.addAll(sents);
+ }
+ for(Sentence sent : consecutiveSents){
+ List<EventMention> events = new ArrayList<>(JCasUtil.selectCovered(jCas, EventMention.class, sent));
+ //filter events:
+ List<EventMention> realEvents = Lists.newArrayList();
+ for( EventMention event : events){
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ events = realEvents;
+ int eventNum = events.size();
+ if(eventNum >=1){
+ pairs.add(new IdentifiedAnnotationPair(events.get(0), time));//pair up the first event and the time
+ if(eventNum > 1){
+ pairs.add(new IdentifiedAnnotationPair(events.get(eventNum-1), time));//pair up the last event and the time
+ }
+ }
+ }
+
+ }
+ }
+
+ return pairs;
+ }
+
+ private static boolean sameSegment(List<Sentence> sents, Segment segment) {
+ if(sents == null || sents.size()==0) return false;
+ Sentence sent = sents.get(0);
+ if(segment.getBegin()<=sent.getBegin() && segment.getEnd()>= sent.getEnd()) return true;
+ return false;
+ }
+
+ @Override
+ protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2, String predictedCategory) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(predictedCategory);
+ relation.addToIndexes();
+ }
+
+
+ @Override
+ protected String getRelationCategory(
+ Map<List<Annotation>, BinaryTextRelation> relationLookup,
+ IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) {
+ BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = relationLookup.get(Arrays.asList(arg2, arg1));
+ if (relation != null) {
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
+ }
+ }
+ }
+ if (category == null && coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+ category = NO_RELATION_CATEGORY;
+ }
+ return category;
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConsecutiveSentencesEventTimeRelationAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,244 @@
+package org.apache.ctakes.temporal.ae;
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.ctakes.temporal.ae.feature.DependencyParseUtils;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+//import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class TemporalRelationRuleAnnotator extends JCasAnnotator_ImplBase {
+
+ @SuppressWarnings("null")
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+ //1: linking E0-T0, E1-T1:
+ Collection<Sentence> sents = JCasUtil.select(jCas, Sentence.class);
+ List<Sentence> sentList = Lists.newArrayList();
+ sentList.addAll(sents);
+ EventMention admission = null;
+ // EventMention discharge = null;
+ // TimeMention admissionDate = null;
+ // TimeMention dischargeDate = null;
+ int sentListLength = sentList.size();
+ if( sentListLength >=4 ){//the first 4 sentences are discharge date and admission date
+ for (int i=0; i<4; i+=2){
+ Sentence currentSent = sentList.get(i);
+ Sentence nextSent = sentList.get(i+1);
+ List<EventMention> currentEvents = JCasUtil.selectCovered(jCas, EventMention.class, currentSent);
+ List<TimeMention> nextTimes = JCasUtil.selectCovered(jCas, TimeMention.class, nextSent);
+
+ int currentSize = currentEvents == null ? 0 : currentEvents.size();
+ int nextTimeSize = nextTimes == null? 0 : nextTimes.size();
+
+ if(currentSize==0 || nextTimeSize ==0) continue;
+
+ EventMention currentEvent = currentEvents.get(0);
+ TimeMention nextTime = nextTimes.get(0);
+
+ if(i == 0){
+ admission = currentEvent;
+ // admissionDate = nextTime;
+ // }else{
+ // discharge = currentEvent;
+ // dischargeDate = nextTime;
+ }
+
+ createRelation(jCas, currentEvent, nextTime, "OVERLAP");
+ }
+ }
+
+ //rule 3: link Timexes with the same strings
+ Collection<TimeMention> times = JCasUtil.select(jCas, TimeMention.class);
+ List<TimeMention> allTimes = Lists.newArrayList();
+ allTimes.addAll(times);
+ int timeNum = allTimes.size();
+ if(timeNum > 2){
+ for(int i=0; i<timeNum-1; i++){
+ TimeMention firstTime = allTimes.get(i);
+ for(int j=i+1;j<timeNum; j++){
+ TimeMention secondTime = allTimes.get(j);
+ if(sameTime(jCas, firstTime, secondTime)){
+ createRelation(jCas, secondTime, firstTime, "OVERLAP");
+ }
+ }
+ }
+ }
+
+ //2: linking coreferent event pairs, lift section restriction
+ Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+
+ Collection<EventMention> allEvents = JCasUtil.select(jCas, EventMention.class);
+ List<EventMention> realEvents = new ArrayList<>();
+ //filtering events
+ for(EventMention event : allEvents){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ allEvents = realEvents;
+
+ for(Sentence sent : sentences){
+ List<EventMention> currentEvents = JCasUtil.selectCovered(jCas, EventMention.class, sent);
+ //filter out ctakes events
+ realEvents = new ArrayList<>();
+ for(EventMention event : currentEvents){
+ // filter out ctakes events
+ if(event.getClass().equals(EventMention.class)){
+ realEvents.add(event);
+ }
+ }
+ currentEvents = realEvents;
+
+ //get dependent pairs:
+// int eventNum = currentEvents.size();
+// if(eventNum >= 4){
+// EventMention first = currentEvents.get(0);
+// //find dependent pairs between first and the rest
+// for (ConllDependencyNode firstNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, first)) {//get the covered conll nodes within the first event
+// String pos = firstNode.getPostag();
+// if(pos.startsWith("NN")||pos.startsWith("VB")){//get the head node
+// for(int j=1;j<eventNum;j++){
+// EventMention nextEvent = currentEvents.get(j);
+// for(ConllDependencyNode nextNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, nextEvent)){//get the covered conll nodes within the next event
+// pos = nextNode.getPostag();
+// if(pos.startsWith("NN")||pos.startsWith("VB")){//get the head node
+// ConllDependencyNode ancestor = DependencyParseUtils.getCommonAncestor(firstNode, nextNode);
+// if(ancestor==firstNode || ancestor==nextNode){
+// createRelation(jCas, nextEvent, first, "OVERLAP");
+// break;
+// }
+// }
+// }
+// }
+// }
+//
+//
+// }
+// }
+
+ //remove current Events from allEvents:
+ for(EventMention event:currentEvents){
+ allEvents.remove(event);
+ //check if current event is the admission event
+ if(admission != null && event!=admission && event.getCoveredText().toLowerCase().startsWith("admitted")){
+ createRelation(jCas, event, admission, "OVERLAP");
+ }
+ }
+
+ for(EventMention arg1: currentEvents){
+ for(EventMention arg2: allEvents){
+ if(hasOverlapNNs(jCas, arg1, arg2)){//hasSameSemanticType(jCas, arg1, arg2) &&
+ createRelation(jCas, arg2, arg1, "OVERLAP");
+ }
+ }
+ }
+
+ }
+ }
+
+ // private static boolean hasSameSemanticType(JCas jCas, EventMention arg1,
+ // EventMention arg2) {
+ // List<EventMention> arg1Events = JCasUtil.selectCovered(jCas, EventMention.class, arg1);
+ // List<EventMention> arg2Events = JCasUtil.selectCovered(jCas, EventMention.class, arg2);
+ // for (EventMention event1 : arg1Events){
+ // if(!event1.getClass().equals(EventMention.class)){//&& event1.getBegin()==arg1.getBegin() && event1.getEnd()==arg1.getEnd()){
+ // for (EventMention event2 : arg2Events){
+ // if(!event2.getClass().equals(EventMention.class)){// && event2.getBegin()==arg2.getBegin() && event2.getEnd()==arg2.getEnd()){
+ // if(event1.getClass().equals(event2.getClass())){
+ // return true;
+ // }
+ // }
+ // }
+ // }
+ // }
+ // return false;
+ // }
+
+ private static boolean sameTime(JCas jCas, TimeMention firstTime,
+ TimeMention secondTime) {
+ List<BaseToken> currentTokens = JCasUtil.selectCovered(jCas, BaseToken.class, firstTime);
+ List<BaseToken> nextTokens = JCasUtil.selectCovered(jCas, BaseToken.class, secondTime);
+ int tokenSize = currentTokens.size();
+ if(tokenSize != nextTokens.size()){
+ return false;
+ }
+ for(int i=0; i<tokenSize; i++){
+ if(!currentTokens.get(i).getCoveredText().equals(nextTokens.get(i).getCoveredText())){
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2, String cagegory) {
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(arg1);
+ relArg1.setRole("Arg1");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(arg2);
+ relArg2.setRole("Arg2");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(cagegory);
+ relation.addToIndexes();
+ }
+
+ /**
+ * Method for checking if two arguments share some common NNs ob VBs.
+ * @param jCas
+ * @param event1
+ * @param event2
+ * @return
+ */
+ private static boolean hasOverlapNNs(JCas jCas, EventMention event1, EventMention event2) {
+ List<WordToken> currentTokens = JCasUtil.selectCovered(jCas, WordToken.class, event1);
+ List<WordToken> nextTokens = JCasUtil.selectCovered(jCas, WordToken.class, event2);
+
+ int NNSize1 = 0;
+ int NNSize2 = 0;
+ int matches = 0;
+ for(WordToken t1: currentTokens){
+ if(t1.getPartOfSpeech().startsWith("NN")||t1.getPartOfSpeech().startsWith("VB")){
+ NNSize1 ++;
+ for(WordToken t2: nextTokens){
+ if(t2.getPartOfSpeech().startsWith("NN")||t2.getPartOfSpeech().startsWith("VB")){
+ NNSize2 ++;
+ if(t1.getCanonicalForm().equals(t2.getCanonicalForm())){
+ matches++;
+ }
+ }
+ }
+
+ }
+ }
+ int NNSize = Math.min(NNSize1, NNSize2);
+ if (NNSize == 0) return false;
+ float matchRatio = (float)matches/NNSize;
+ if( matchRatio == 1)
+ return true;
+ return false;
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationRuleAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+//import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+//import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
+//import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventPositionFeatureExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
+//import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXExtractor;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+//import org.apache.ctakes.temporal.ae.feature.duration.DurationExpectationFeatureExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+//import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.ml.feature.extractor.CombinedExtractor1;
+import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
+import org.cleartk.ml.feature.extractor.TypePathExtractor;
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+//import com.google.common.base.Charsets;
+
+public class TimexAdmissionTimeAnnotator extends CleartkAnnotator<String> {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexAdmissionTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexAdmissionTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+
+ /**
+ * @deprecated use String path instead of File.
+ * ClearTK will automatically Resolve the String to an InputStream.
+ * This will allow resources to be read within from a jar as well as File.
+ */
+ @SuppressWarnings("dep-ann")
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexAdmissionTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ private CleartkExtractor contextExtractor;
+ private NearbyVerbTenseXExtractor verbTensePatternExtractor;
+ // private SectionHeaderExtractor sectionIDExtractor;
+ // private EventPositionFeatureExtractor eventPositionExtractor;
+ private ClosestVerbExtractor closestVerbExtractor;
+ private TimeXExtractor timeXExtractor;
+ // private EventPropertyExtractor genericExtractor;
+ private DateAndMeasurementExtractor dateExtractor;
+// private UmlsSingleFeatureExtractor umlsExtractor;
+ // private CoveredTextToValuesExtractor disSemExtractor;
+ // private DurationExpectationFeatureExtractor durationExtractor;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ CombinedExtractor1 baseExtractor = new CombinedExtractor1(
+ new CoveredTextExtractor(),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+ this.contextExtractor = new CleartkExtractor(
+ BaseToken.class,
+ baseExtractor,
+ new Preceding(3),
+ new Covered(),
+ new Following(3));
+ this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
+ // this.sectionIDExtractor = new SectionHeaderExtractor();
+ // this.eventPositionExtractor = new EventPositionFeatureExtractor();
+ this.closestVerbExtractor = new ClosestVerbExtractor();
+ this.timeXExtractor = new TimeXExtractor();
+ // this.genericExtractor = new EventPropertyExtractor();
+ this.dateExtractor = new DateAndMeasurementExtractor();
+// this.umlsExtractor = new UmlsSingleFeatureExtractor();
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ //get discharge Time id: T1:
+ TimeMention admissionTime = null;
+ List<Segment> histories = Lists.newArrayList();
+ //1. identify admissionTime
+ //may need better way to identify Discharge Time other than relative span information:
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, 15, 30)) {
+ if(time.getTimeClass().equals("DATE")){
+ admissionTime = time;
+ break;
+ }
+ }
+
+ //2. identify the HOPI section:
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ for(Segment seg: segments){
+ if (seg.getId().equals("history")){//find the right segment
+ if(JCasUtil.selectCovered(jCas,Sentence.class,seg).size()>0){//ignore empty section
+ histories.add(seg);
+ }
+ }
+ }
+
+ //get event-time1 relations:
+ Map<List<Annotation>, TemporalTextRelation> admissionTimeRelationLookup;
+ admissionTimeRelationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ // admissionTimeRelationLookup = new HashMap<>();
+ for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ if(arg1 instanceof TimeMention && arg2 instanceof TimeMention ){
+ if( arg1==admissionTime){
+ admissionTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }else if(arg1 instanceof TimeMention && arg2 instanceof TimeMention){
+ if( arg2==admissionTime ){
+ admissionTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }
+
+ }
+ }
+
+ for (Segment historyOfPresentIll : histories){
+ for (TimeMention timeMention : JCasUtil.selectCovered(jCas, TimeMention.class, historyOfPresentIll)) {
+ List<Feature> features = this.contextExtractor.extract(jCas, timeMention);
+ features.addAll(this.verbTensePatternExtractor.extract(jCas, timeMention));//add nearby verb POS pattern feature
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ // features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
+ features.addAll(this.closestVerbExtractor.extract(jCas, timeMention)); //add closest verb
+ features.addAll(this.timeXExtractor.extract(jCas, timeMention)); //add the closest time expression types
+ // features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.dateExtractor.extract(jCas, timeMention)); //add the closest NE type
+// features.addAll(this.umlsExtractor.extract(jCas, timeMention)); //add umls features
+ // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
+ // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ if (this.isTraining()) {
+ TemporalTextRelation relation = admissionTimeRelationLookup.get(Arrays.asList(timeMention, admissionTime));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = admissionTimeRelationLookup.get(Arrays.asList(admissionTime, timeMention));
+ if (relation != null) {
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
+ }
+ }
+ }
+ if(category!=null){
+ this.dataWriter.write(new Instance<>(category, features));
+ }
+ } else {
+ String outcome = this.classifier.classify(features);
+ if(outcome!=null){
+ // add the relation to the CAS
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(timeMention);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(admissionTime);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(outcome);
+ relation.addToIndexes();
+ }else{
+ if (admissionTime != null)
+ System.out.println("cannot classify "+ timeMention.getCoveredText()+" and " + admissionTime.getCoveredText());
+ else
+ System.out.println("cannot classify and null admission Date");
+ }
+ }
+
+
+ }
+ }
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexAdmissionTimeAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+//import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+//import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
+//import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventPositionFeatureExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
+//import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXExtractor;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+//import org.apache.ctakes.temporal.ae.feature.duration.DurationExpectationFeatureExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.ml.feature.extractor.CombinedExtractor1;
+import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
+import org.cleartk.ml.feature.extractor.TypePathExtractor;
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+//import com.google.common.base.Charsets;
+
+public class TimexDischargeTimeAnnotator extends CleartkAnnotator<String> {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexDischargeTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(String modelPath)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexDischargeTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ modelPath);
+ }
+ /**
+ * @deprecated use String path instead of File.
+ * ClearTK will automatically Resolve the String to an InputStream.
+ * This will allow resources to be read within from a jar as well as File.
+ */
+ @SuppressWarnings("dep-ann")
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimexDischargeTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ private CleartkExtractor contextExtractor;
+ private NearbyVerbTenseXExtractor verbTensePatternExtractor;
+ // private SectionHeaderExtractor sectionIDExtractor;
+ // private EventPositionFeatureExtractor eventPositionExtractor;
+ private ClosestVerbExtractor closestVerbExtractor;
+ private TimeXExtractor timeXExtractor;
+ // private EventPropertyExtractor genericExtractor;
+ private DateAndMeasurementExtractor dateExtractor;
+ // private UmlsSingleFeatureExtractor umlsExtractor;
+ // private CoveredTextToValuesExtractor disSemExtractor;
+ // private DurationExpectationFeatureExtractor durationExtractor;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ CombinedExtractor1 baseExtractor = new CombinedExtractor1(
+ new CoveredTextExtractor(),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+ this.contextExtractor = new CleartkExtractor(
+ BaseToken.class,
+ baseExtractor,
+ new Preceding(3),
+ new Covered(),
+ new Following(3));
+ this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
+ // this.sectionIDExtractor = new SectionHeaderExtractor();
+ // this.eventPositionExtractor = new EventPositionFeatureExtractor();
+ this.closestVerbExtractor = new ClosestVerbExtractor();
+ this.timeXExtractor = new TimeXExtractor();
+ // this.genericExtractor = new EventPropertyExtractor();
+ this.dateExtractor = new DateAndMeasurementExtractor();
+ // this.umlsExtractor = new UmlsSingleFeatureExtractor();
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ //get discharge Time id: T1:
+ TimeMention dischargeTime = null;
+ List<Segment> courses = Lists.newArrayList();
+ //TODO
+ //may need better way to identify Discharge Time other than relative span information:
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, 40, 60)) {
+ if(time.getTimeClass().equals("DATE")){
+ dischargeTime = time;
+ break;
+ }
+ }
+
+ if(dischargeTime != null){
+
+ //2. identify the Hospital Course section:
+ Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+ for(Segment seg: segments){
+ if (seg.getId().equals("course")){//find the right segment
+ if(JCasUtil.selectCovered(jCas,Sentence.class,seg).size()>0){//ignore empty section
+ courses.add(seg);
+ }
+ }
+ }
+
+ //get event-time1 relations:
+ Map<List<Annotation>, TemporalTextRelation> dischargeTimeRelationLookup;
+ dischargeTimeRelationLookup = new HashMap<>();
+ if (this.isTraining()) {
+ // dischargeTimeRelationLookup = new HashMap<>();
+ for (TemporalTextRelation relation : JCasUtil.select(jCas, TemporalTextRelation.class)) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ // The key is a list of args so we can do bi-directional lookup
+ if(arg1 instanceof TimeMention && arg2 instanceof TimeMention ){
+ if( arg1==dischargeTime){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }else if(arg1 instanceof TimeMention && arg2 instanceof TimeMention ){
+ if( arg2==dischargeTime ){
+ dischargeTimeRelationLookup.put(Arrays.asList(arg1, arg2), relation);
+ continue;
+ }
+ }
+
+ }
+ }
+
+ for(Segment course: courses){
+ for (TimeMention timeMention : JCasUtil.selectCovered(jCas, TimeMention.class, course)) {
+ List<Feature> features = this.contextExtractor.extract(jCas, timeMention);
+ features.addAll(this.verbTensePatternExtractor.extract(jCas, timeMention));//add nearby verb POS pattern feature
+ // features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading
+ // features.addAll(this.eventPositionExtractor.extract(jCas, eventMention));
+ features.addAll(this.closestVerbExtractor.extract(jCas, timeMention)); //add closest verb
+ features.addAll(this.timeXExtractor.extract(jCas, timeMention)); //add the closest time expression types
+ // features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types
+ features.addAll(this.dateExtractor.extract(jCas, timeMention)); //add the closest NE type
+ // features.addAll(this.umlsExtractor.extract(jCas, timeMention)); //add umls features
+ // features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add duration feature
+ // features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add distributional semantic features
+ if (this.isTraining()) {
+ TemporalTextRelation relation = dischargeTimeRelationLookup.get(Arrays.asList(timeMention, dischargeTime));
+ String category = null;
+ if (relation != null) {
+ category = relation.getCategory();
+ } else {
+ relation = dischargeTimeRelationLookup.get(Arrays.asList(dischargeTime, timeMention));
+ if (relation != null) {
+ if(relation.getCategory().equals("OVERLAP")){
+ category = relation.getCategory();
+ }else if (relation.getCategory().equals("BEFORE")){
+ category = "AFTER";
+ }else if (relation.getCategory().equals("AFTER")){
+ category = "BEFORE";
+ }
+ }
+ }
+ if(category!=null){
+ this.dataWriter.write(new Instance<>(category, features));
+ }
+ } else {
+ String outcome = this.classifier.classify(features);
+ if(outcome!=null){
+ // add the relation to the CAS
+ RelationArgument relArg1 = new RelationArgument(jCas);
+ relArg1.setArgument(timeMention);
+ relArg1.setRole("Argument");
+ relArg1.addToIndexes();
+ RelationArgument relArg2 = new RelationArgument(jCas);
+ relArg2.setArgument(dischargeTime);
+ relArg2.setRole("Related_to");
+ relArg2.addToIndexes();
+ TemporalTextRelation relation = new TemporalTextRelation(jCas);
+ relation.setArg1(relArg1);
+ relation.setArg2(relArg2);
+ relation.setCategory(outcome);
+ relation.addToIndexes();
+ }else{
+ System.out.println("cannot classify "+ timeMention.getCoveredText()+" and " + dischargeTime.getCoveredText());
+ }
+ }
+
+ }
+ }
+ }
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimexDischargeTimeAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java?rev=1636640&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java Tue Nov 4 16:30:36 2014
@@ -0,0 +1,88 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Bag;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.FirstCovered;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.LastCovered;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.ml.feature.extractor.DistanceExtractor;
+import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
+import org.cleartk.ml.feature.extractor.NamingExtractor1;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.feature.extractor.CombinedExtractor1;
+
+public class UnexpandedTokenFeaturesExtractor extends TokenFeaturesExtractor {
+
+ private FeatureExtractor1 coveredText = new CoveredTextExtractor();
+
+ /**
+ * First word of the mention, last word of the mention, all words of the mention as a bag, the
+ * preceding 3 words, the following 3 words
+ */
+ private FeatureExtractor1 tokenContext = new CleartkExtractor(
+ BaseToken.class,
+ coveredText,
+ new FirstCovered(1),
+ new LastCovered(1),
+ new Bag(new Covered()),
+ new Preceding(3),
+ new Following(3));
+
+ /**
+ * All extractors for mention 1, with features named to distinguish them from mention 2
+ */
+ private FeatureExtractor1 mention1FeaturesExtractor = new NamingExtractor1(
+ "mention1",
+ new CombinedExtractor1(coveredText, tokenContext));
+
+ /**
+ * All extractors for mention 2, with features named to distinguish them from mention 1
+ */
+ private FeatureExtractor1 mention2FeaturesExtractor = new NamingExtractor1(
+ "mention2",
+ new CombinedExtractor1(coveredText, tokenContext));
+
+ /**
+ * First word, last word, and all words between the mentions
+ */
+ private CleartkExtractor tokensBetween = new CleartkExtractor(
+ BaseToken.class,
+ new NamingExtractor1("BetweenMentions", coveredText),
+ new FirstCovered(1),
+ new LastCovered(1),
+ new Bag(new Covered()));
+
+ /**
+ * Number of words between the mentions
+ */
+ private DistanceExtractor nTokensBetween = new DistanceExtractor(null, BaseToken.class);
+
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation mention2)
+ throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<>();
+ Annotation arg1 = mention1;
+ Annotation arg2 = mention2;
+
+ //no expansion to NP for each event
+
+ features.addAll(this.mention1FeaturesExtractor.extract(jCas, arg1));
+ features.addAll(this.mention2FeaturesExtractor.extract(jCas, arg2));
+ features.addAll(this.tokensBetween.extractBetween(jCas, arg1, arg2));
+ features.addAll(this.nTokensBetween.extract(jCas, arg1, arg2));
+ return features;
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UnexpandedTokenFeaturesExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain