You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2015/07/15 23:22:06 UTC

svn commit: r1691276 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java

Author: clin
Date: Wed Jul 15 21:22:06 2015
New Revision: 1691276

URL: http://svn.apache.org/r1691276
Log:
An event-event relation annotator that makes use of known containers info.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java   (with props)

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java?rev=1691276&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java Wed Jul 15 21:22:06 2015
@@ -0,0 +1,330 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.CheckSpecialWordRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.ConjunctionRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.DependencyParseUtils;
+import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.CoordinateFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependingVerbsFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.EmptyFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.MultiTokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.NoOtherETPuntInBetweenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventInBetweenPropertyExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventOutsidePropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.SpecialAnnotationRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.TemporalPETFlatExtractor;
+import org.apache.ctakes.temporal.ae.feature.TokenPropertyFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DeterminerRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventTimeRelationFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPositionRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.NumberOfEventsInTheSameSentenceExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.NumberOfEventTimeBetweenCandidatesExtractor;
+import org.apache.ctakes.temporal.ae.feature.OverlappedHeadFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SRLRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.SectionHeaderRelationExtractor;
+//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalPETExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+//import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.cleartk.util.ViewUriUtil;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventEventRelationGoldContainerAnnotator extends RelationExtractorAnnotator {
+
+	public static AnalysisEngineDescription createDataWriterDescription(
+			Class<? extends DataWriter<String>> dataWriterClass,
+					File outputDirectory,
+					double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+		return AnalysisEngineFactory.createEngineDescription(
+				EventEventRelationGoldContainerAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				true,
+				DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+				dataWriterClass,
+				DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+				outputDirectory,
+				RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+				// not sure why this has to be cast; something funny going on in uimaFIT maybe?
+				(float) probabilityOfKeepingANegativeExample);
+	}
+
+	public static AnalysisEngineDescription createAnnotatorDescription(String modelPath)
+			throws ResourceInitializationException {
+		return AnalysisEngineFactory.createEngineDescription(
+				EventEventRelationGoldContainerAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				false,
+				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+				modelPath);
+	}
+
+
+	/**
+	 * @deprecated use String path instead of File.
+	 * ClearTK will automatically Resolve the String to an InputStream.
+	 * This will allow resources to be read within from a jar as well as File.  
+	 */	  
+	@SuppressWarnings("dep-ann")
+	public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+			throws ResourceInitializationException {
+		return AnalysisEngineFactory.createEngineDescription(
+				EventEventRelationGoldContainerAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				false,
+				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+				new File(modelDirectory, "model.jar"));
+	}
+
+	@Override
+	protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>> getFeatureExtractors() {
+		return Lists.newArrayList(
+				new UnexpandedTokenFeaturesExtractor() 
+				//				new TokenFeaturesExtractor()		
+				//								new EmptyFeaturesExtractor()
+				, new PartOfSpeechFeaturesExtractor()
+				, new EventArgumentPropertyExtractor()
+				, new UmlsFeatureExtractor()
+				, new DependencyPathFeaturesExtractor()
+				, new OverlappedHeadFeaturesExtractor()
+
+				//				, new NoOtherETPuntInBetweenFeaturesExtractor()
+				//				, new NumberOfEventTimeBetweenCandidatesExtractor()
+				//				, new NearbyVerbTenseRelationExtractor()
+				//				, new CheckSpecialWordRelationExtractor()
+				//				, new CoordinateFeaturesExtractor()
+				//				, new SRLRelationFeaturesExtractor()
+				//				, new NumberOfEventsInTheSameSentenceExtractor()
+				//				, new ConjunctionRelationFeaturesExtractor()
+				//				, new EventTimeRelationFeatureExtractor()
+
+				//				new MultiTokenFeaturesExtractor()
+				//				new UnexpandedTokenFeaturesExtractor() //use unexpanded version for i2b2 data
+				//				, new EmptyFeaturesExtractor()
+
+				//				, new SectionHeaderRelationExtractor()
+				//				, new EventPositionRelationFeaturesExtractor() //not helpful
+				//				, new TimeXRelationFeaturesExtractor() //not helpful
+				//				, new DeterminerRelationFeaturesExtractor()
+				//				, new TokenPropertyFeaturesExtractor()
+				//				, new DependingVerbsFeatureExtractor()
+				//				, new SpecialAnnotationRelationExtractor() //not helpful
+				//								, new TemporalPETFlatExtractor()
+
+				);
+	}
+
+	@Override
+	protected Class<? extends Annotation> getCoveringClass() {
+		return Sentence.class;
+	}
+
+	@Override
+	protected List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas jCas, Annotation sentence) {
+
+		JCas goldView = null;
+		try {
+			goldView = jCas.getView("GoldView");
+		} catch (CASException e) {
+			new AnalysisEngineProcessException(e).printStackTrace();
+		}
+
+
+		List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+		
+		List<EventMention> events = new ArrayList<>(JCasUtil.selectCovered(jCas, EventMention.class, sentence));
+		//filter events:
+		List<EventMention> realEvents = Lists.newArrayList();
+		for( EventMention event : events){
+			if(event.getClass().equals(EventMention.class)){
+				realEvents.add(event);
+			}
+		}
+		events = realEvents;
+
+		int eventNum = events.size();
+
+		if(eventNum == 0){
+			return pairs;
+		}
+
+		//find all container events:
+		HashSet<EventMention> containers = new HashSet<>();
+		for(TemporalTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)){
+			Annotation arg1 = relation.getArg1().getArgument();
+			Annotation arg2 = relation.getArg2().getArgument();
+			String type = relation.getCategory();
+			if( withinSentence(arg1,sentence) && withinSentence(arg2,sentence) ){
+				if(arg1 instanceof EventMention && arg2 instanceof EventMention){
+					//get all gold containers
+					if(type.equals("CONTAINS")){
+						EventMention container = findEvent(arg1,events);
+						if(container != null) {
+							containers.add(container);
+							events.remove(container);
+						}
+					}else if (type.equals("CONTAINS-1")){
+						EventMention container = findEvent(arg2,events);
+						if(container != null ){
+							containers.add(container);
+							events.remove(container);
+						}
+					}
+
+				}
+			}
+		}
+
+		int containerNum = containers.size();
+		if(containerNum==0){
+			return pairs;
+		}
+
+		
+		for(EventMention currentEvent : events){
+			if(containerNum==1){
+				for(EventMention container: containers){
+					pairs.add(new IdentifiedAnnotationPair(container, currentEvent));
+				}
+			}else{//if there are multiple containers
+				for(EventMention container: containers){
+					boolean noContainerInBetween = true;
+					for( EventMention inbetweenPotentialContainer : JCasUtil.selectBetween(jCas, EventMention.class, container, currentEvent)){
+						if(containers.contains(inbetweenPotentialContainer)){
+							noContainerInBetween = false;
+							break;
+						}
+					}
+					if(noContainerInBetween){
+						pairs.add(new IdentifiedAnnotationPair(container, currentEvent));
+					}
+				}
+			}
+		}		
+
+		return pairs;
+	}
+
+	private static EventMention findEvent(Annotation arg, List<EventMention> events) {
+		for(EventMention event: events){
+			if(arg.getBegin()==event.getBegin() && arg.getEnd() == event.getEnd()){
+				return event;
+			}
+		}
+		return null;
+	}
+
+	private static boolean withinSentence(Annotation arg, Annotation sentence) {
+		if(arg.getBegin()>=sentence.getBegin()&&arg.getEnd()<=sentence.getEnd())
+			return true;
+		return false;
+	}
+
+	@Override
+	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2, String predictedCategory) {
+		RelationArgument relArg1 = new RelationArgument(jCas);
+		relArg1.setArgument(arg1);
+		relArg1.setRole("Arg1");
+		relArg1.addToIndexes();
+		RelationArgument relArg2 = new RelationArgument(jCas);
+		relArg2.setArgument(arg2);
+		relArg2.setRole("Arg2");
+		relArg2.addToIndexes();
+		TemporalTextRelation relation = new TemporalTextRelation(jCas);
+		relation.setArg1(relArg1);
+		relation.setArg2(relArg2);
+		relation.setCategory(predictedCategory);
+		relation.addToIndexes();
+	}
+
+	@Override
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) {
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		String category = null;
+		if (relation != null && relation instanceof TemporalTextRelation) {
+			category = relation.getCategory();
+		} else {
+			relation = relationLookup.get(Arrays.asList(arg2, arg1));
+			if (relation != null && relation instanceof TemporalTextRelation) {
+				if(relation.getCategory().equals("OVERLAP")){
+					category = relation.getCategory();
+					//				}else if (relation.getCategory().equals("BEFORE")){
+					//					category = "AFTER";
+					//				}else if (relation.getCategory().equals("AFTER")){
+					//					category = "BEFORE";
+					//				}
+				}else{
+					category = relation.getCategory() + "-1";
+				}
+			}
+		}
+		if (category == null && coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+			category = NO_RELATION_CATEGORY;
+		}
+		return category;
+	}
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationGoldContainerAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain