You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/01/29 20:22:52 UTC
svn commit: r1440068 - in /incubator/ctakes/trunk/ctakes-temporal: pom.xml
src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
Author: stevenbethard
Date: Tue Jan 29 19:22:51 2013
New Revision: 1440068
URL: http://svn.apache.org/viewvc?rev=1440068&view=rev
Log:
Adds initial draft of event-time temporal relation annotator, based on relation extraction framework
Added:
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (with props)
Modified:
incubator/ctakes/trunk/ctakes-temporal/pom.xml
Modified: incubator/ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/pom.xml?rev=1440068&r1=1440067&r2=1440068&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/pom.xml (original)
+++ incubator/ctakes/trunk/ctakes-temporal/pom.xml Tue Jan 29 19:22:51 2013
@@ -96,6 +96,10 @@
<artifactId>ctakes-dependency-parser</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-relation-extractor</artifactId>
+ </dependency>
+ <dependency>
<groupId>net.sourceforge.ctakesresources</groupId>
<artifactId>ctakes-resources-umls2011ab</artifactId>
<version>3.1.0</version>
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java?rev=1440068&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java Tue Jan 29 19:22:51 2013
@@ -0,0 +1,72 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimeRelationAnnotator extends RelationExtractorAnnotator {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory,
+ double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventTimeRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory,
+ RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+ // not sure why this has to be cast; something funny going on in uimaFIT maybe?
+ (float)probabilityOfKeepingANegativeExample);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventTimeRelationAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ @Override
+ protected List<RelationFeaturesExtractor> getFeatureExtractors() {
+ return Lists.newArrayList(new TokenFeaturesExtractor(), new PartOfSpeechFeaturesExtractor());
+ }
+
+ @Override
+ public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+ JCas jCas,
+ Sentence sentence) {
+ List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+ for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+ for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+ pairs.add(new IdentifiedAnnotationPair(event, time));
+ }
+ }
+ return pairs;
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1440068&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java Tue Jan 29 19:22:51 2013
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
+import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfTemporalRelations extends
+ Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ List<Integer> patientSets = options.getPatients().getList();
+ List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+ List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+ EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations(
+ new File("target/eval/temporal-relations"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory());
+ AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+ System.err.println(stats);
+ }
+
+ public EvaluationOfTemporalRelations(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+ }
+
+ @Override
+ protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+ List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
+ result.add(EventMention.class);
+ result.add(TimeMention.class);
+ return result;
+ }
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTrainDescription());
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class));
+ aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription(
+ LIBSVMStringOutcomeDataWriter.class,
+ directory,
+ 1.0));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+ JarClassifierBuilder.trainAndPackage(directory, "-c", "1000");
+ }
+
+ @Override
+ protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
+ throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTestDescription());
+ aggregateBuilder.add(
+ AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class),
+ CAS.NAME_DEFAULT_SOFA,
+ GOLD_VIEW_NAME);
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class));
+ aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(directory));
+
+ Function<BinaryTextRelation, ?> getSpan = new Function<BinaryTextRelation, HashableArguments>() {
+ public HashableArguments apply(BinaryTextRelation relation) {
+ return new HashableArguments(relation);
+ }
+ };
+ Function<BinaryTextRelation, String> getOutcome = AnnotationStatistics.annotationToFeatureValue("category");
+
+ AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
+ for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ Collection<BinaryTextRelation> goldRelations = JCasUtil.select(
+ goldView,
+ BinaryTextRelation.class);
+ Collection<BinaryTextRelation> systemRelations = JCasUtil.select(
+ systemView,
+ BinaryTextRelation.class);
+ stats.add(goldRelations, systemRelations, getSpan, getOutcome);
+ }
+ return stats;
+ }
+
+ public static class RemoveNonTLINKRelations extends JCasAnnotator_ImplBase {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+ jCas,
+ BinaryTextRelation.class))) {
+ if (!relation.getCategory().startsWith("TLINK")) {
+ relation.getArg1().removeFromIndexes();
+ relation.getArg2().removeFromIndexes();
+ relation.removeFromIndexes();
+ }
+ }
+ }
+ }
+
+ public static class RemoveRelations extends JCasAnnotator_ImplBase {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+ jCas,
+ BinaryTextRelation.class))) {
+ relation.getArg1().removeFromIndexes();
+ relation.getArg2().removeFromIndexes();
+ relation.removeFromIndexes();
+ }
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
------------------------------------------------------------------------------
svn:mime-type = text/plain