You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/01/29 20:22:52 UTC

svn commit: r1440068 - in /incubator/ctakes/trunk/ctakes-temporal: pom.xml src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java

Author: stevenbethard
Date: Tue Jan 29 19:22:51 2013
New Revision: 1440068

URL: http://svn.apache.org/viewvc?rev=1440068&view=rev
Log:
Adds initial draft of event-time temporal relation annotator, based on relation extraction framework

Added:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java   (with props)
Modified:
    incubator/ctakes/trunk/ctakes-temporal/pom.xml

Modified: incubator/ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/pom.xml?rev=1440068&r1=1440067&r2=1440068&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/pom.xml (original)
+++ incubator/ctakes/trunk/ctakes-temporal/pom.xml Tue Jan 29 19:22:51 2013
@@ -96,6 +96,10 @@
 			<artifactId>ctakes-dependency-parser</artifactId>
 		</dependency>
 		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-relation-extractor</artifactId>
+		</dependency>
+		<dependency>
 			<groupId>net.sourceforge.ctakesresources</groupId>
 			<artifactId>ctakes-resources-umls2011ab</artifactId>
 			<version>3.1.0</version>

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java?rev=1440068&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java Tue Jan 29 19:22:51 2013
@@ -0,0 +1,72 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimeRelationAnnotator extends RelationExtractorAnnotator {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File outputDirectory,
+      double probabilityOfKeepingANegativeExample) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        EventTimeRelationAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory,
+        RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+        // not sure why this has to be cast; something funny going on in uimaFIT maybe?
+        (float)probabilityOfKeepingANegativeExample);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        EventTimeRelationAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
+  @Override
+  protected List<RelationFeaturesExtractor> getFeatureExtractors() {
+    return Lists.newArrayList(new TokenFeaturesExtractor(), new PartOfSpeechFeaturesExtractor());
+  }
+
+  @Override
+  public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+      JCas jCas,
+      Sentence sentence) {
+    List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+    for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+      for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+        pairs.add(new IdentifiedAnnotationPair(event, time));
+      }
+    }
+    return pairs;
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1440068&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java Tue Jan 29 19:22:51 2013
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
+import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfTemporalRelations extends
+    Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+  public static void main(String[] args) throws Exception {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+    List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+    EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations(
+        new File("target/eval/temporal-relations"),
+        options.getRawTextDirectory(),
+        options.getKnowtatorXMLDirectory());
+    AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+    System.err.println(stats);
+  }
+
+  public EvaluationOfTemporalRelations(
+      File baseDirectory,
+      File rawTextDirectory,
+      File knowtatorXMLDirectory) {
+    super(
+        baseDirectory,
+        rawTextDirectory,
+        knowtatorXMLDirectory,
+        EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+  }
+
+  @Override
+  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+    List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
+    result.add(EventMention.class);
+    result.add(TimeMention.class);
+    return result;
+  }
+
+  @Override
+  protected void train(CollectionReader collectionReader, File directory) throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class));
+    aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription(
+        LIBSVMStringOutcomeDataWriter.class,
+        directory,
+        1.0));
+    SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+    JarClassifierBuilder.trainAndPackage(directory, "-c", "1000");
+  }
+
+  @Override
+  protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
+      throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    aggregateBuilder.add(
+        AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class),
+        CAS.NAME_DEFAULT_SOFA,
+        GOLD_VIEW_NAME);
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class));
+    aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(directory));
+
+    Function<BinaryTextRelation, ?> getSpan = new Function<BinaryTextRelation, HashableArguments>() {
+      public HashableArguments apply(BinaryTextRelation relation) {
+        return new HashableArguments(relation);
+      }
+    };
+    Function<BinaryTextRelation, String> getOutcome = AnnotationStatistics.annotationToFeatureValue("category");
+
+    AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
+    for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+      JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+      JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      Collection<BinaryTextRelation> goldRelations = JCasUtil.select(
+          goldView,
+          BinaryTextRelation.class);
+      Collection<BinaryTextRelation> systemRelations = JCasUtil.select(
+          systemView,
+          BinaryTextRelation.class);
+      stats.add(goldRelations, systemRelations, getSpan, getOutcome);
+    }
+    return stats;
+  }
+
+  public static class RemoveNonTLINKRelations extends JCasAnnotator_ImplBase {
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+          jCas,
+          BinaryTextRelation.class))) {
+        if (!relation.getCategory().startsWith("TLINK")) {
+          relation.getArg1().removeFromIndexes();
+          relation.getArg2().removeFromIndexes();
+          relation.removeFromIndexes();
+        }
+      }
+    }
+  }
+
+  public static class RemoveRelations extends JCasAnnotator_ImplBase {
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+          jCas,
+          BinaryTextRelation.class))) {
+        relation.getArg1().removeFromIndexes();
+        relation.getArg2().removeFromIndexes();
+        relation.removeFromIndexes();
+      }
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain