You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by cl...@apache.org on 2013/08/06 22:00:28 UTC
svn commit: r1511107 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/
eval/
Author: clin
Date: Tue Aug 6 20:00:28 2013
New Revision: 1511107
URL: http://svn.apache.org/r1511107
Log:
add evaluation script to compare cleartk's "DocTimeRel" and our "DocTimeRel".
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java (with props)
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java (with props)
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java?rev=1511107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java Tue Aug 6 20:00:28 2013
@@ -0,0 +1,66 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.timeml.type.Anchor;
+import org.cleartk.timeml.type.TemporalLink;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * for every cTAKES EventMention annotation, create a ClearTK Event Annotation
+ * @author Chen Lin
+ *
+ */
+public class ClearTKDocTimeRelAnnotator extends JCasAnnotator_ImplBase {
+
+ public ClearTKDocTimeRelAnnotator() {
+ // TODO Auto-generated constructor stub
+ }
+
+ @Override
+ public void process(JCas jCas)
+ throws AnalysisEngineProcessException {
+ for (TemporalLink tlink : JCasUtil.select(jCas, TemporalLink.class)) {
+ // create a cleartk event object
+ String relation = tlink.getRelationType();
+// System.out.println("__find tlink relation: " + relation + " srouce: " + tlink.getSource().getCoveredText() + " target: " + tlink.getTarget().getType().getName());
+
+ Anchor linksource = tlink.getSource();
+
+ EventMention ement = new EventMention(jCas);
+ ement.setBegin(linksource.getBegin());
+ ement.setEnd(linksource.getEnd());
+ Event event = new Event(jCas);
+
+ EventProperties eventProperties = new EventProperties(jCas);
+ if ("INCLUDES".equals(relation)){
+ eventProperties.setDocTimeRel("OVERLAP");
+ }else{
+ eventProperties.setDocTimeRel(relation);
+ }
+ event.setProperties(eventProperties);
+ event.setMentions(new FSArray(jCas, 1));
+ event.setMentions(0, ement);
+ ement.setEvent(event);
+
+ eventProperties.addToIndexes();
+ event.addToIndexes();
+ ement.addToIndexes();
+
+ }
+
+ }
+
+ public static AnalysisEngineDescription getAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(ClearTKDocTimeRelAnnotator.class);
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocTimeRelAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java?rev=1511107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java Tue Aug 6 20:00:28 2013
@@ -0,0 +1,35 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.timeml.type.DocumentCreationTime;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+
+/**
+ * for every cTAKES JCas, create a ClearTK Document Creation Time Annotation, fake span, no attribute.
+ * @author Chen Lin
+ *
+ */
+public class ClearTKDocumentCreationTimeAnnotator extends JCasAnnotator_ImplBase {
+
+ public ClearTKDocumentCreationTimeAnnotator() {
+ // TODO Auto-generated constructor stub
+ }
+
+ @Override
+ public void process(JCas jCas)
+ throws AnalysisEngineProcessException {
+ // create a cleartk dct object
+ DocumentCreationTime dct = new DocumentCreationTime(jCas);
+
+ dct.addToIndexes();
+ }
+
+ public static AnalysisEngineDescription getAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(ClearTKDocumentCreationTimeAnnotator.class);
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ClearTKDocumentCreationTimeAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java?rev=1511107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java Tue Aug 6 20:00:28 2013
@@ -0,0 +1,42 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.timeml.type.Event;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * for every cTAKES EventMention annotation, create a ClearTK Event Annotation
+ * @author Chen Lin
+ *
+ */
+public class EventToClearTKEventAnnotator extends JCasAnnotator_ImplBase {
+
+ public EventToClearTKEventAnnotator() {
+ // TODO Auto-generated constructor stub
+ }
+
+ @Override
+ public void process(JCas jCas)
+ throws AnalysisEngineProcessException {
+ for (EventMention eMention : JCasUtil.select(jCas, EventMention.class)) {
+ // create a cleartk event object
+ Event event = new Event(jCas);
+ event.setBegin(eMention.getBegin());
+ event.setEnd(eMention.getEnd());
+
+ event.addToIndexes();
+ }
+
+ }
+
+ public static AnalysisEngineDescription getAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(EventToClearTKEventAnnotator.class);
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventToClearTKEventAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java?rev=1511107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java Tue Aug 6 20:00:28 2013
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.temporal.ae.EventToClearTKEventAnnotator;
+import org.apache.ctakes.temporal.ae.ClearTKDocumentCreationTimeAnnotator;
+import org.apache.ctakes.temporal.ae.ClearTKDocTimeRelAnnotator;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.syntax.opennlp.ParserAnnotator;
+import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
+import org.cleartk.syntax.opennlp.SentenceAnnotator;
+import org.cleartk.timeml.event.EventAspectAnnotator;
+import org.cleartk.timeml.event.EventClassAnnotator;
+import org.cleartk.timeml.event.EventModalityAnnotator;
+import org.cleartk.timeml.event.EventPolarityAnnotator;
+import org.cleartk.timeml.event.EventTenseAnnotator;
+import org.cleartk.timeml.tlink.TemporalLinkEventToDocumentCreationTimeAnnotator;
+import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
+import org.cleartk.token.tokenizer.TokenAnnotator;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Maps;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfClearTKEventProperties extends
+ Evaluation_ImplBase<Map<String, AnnotationStatistics<String>>> {
+
+ private static final String DOC_TIME_REL = "docTimeRel";
+
+ private static final List<String> PROPERTY_NAMES = Arrays.asList(DOC_TIME_REL);
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ List<Integer> patientSets = options.getPatients().getList();
+ List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+ List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+ EvaluationOfClearTKEventProperties evaluation = new EvaluationOfClearTKEventProperties(
+ new File("target/eval/event-properties"),
+ options.getRawTextDirectory(),
+ options.getXMLDirectory(),
+ options.getXMLFormat(),
+ options.getXMIDirectory());
+ evaluation.prepareXMIsFor(patientSets);
+ evaluation.logClassificationErrors(new File("target/eval"), "ctakes-event-property-errors");
+ Map<String, AnnotationStatistics<String>> stats = evaluation.trainAndTest(trainItems, devItems);
+ for (String name : PROPERTY_NAMES) {
+ System.err.println("====================");
+ System.err.println(name);
+ System.err.println("--------------------");
+ System.err.println(stats.get(name));
+ }
+ }
+
+ private Map<String, Logger> loggers = Maps.newHashMap();
+
+ public EvaluationOfClearTKEventProperties(
+ File baseDirectory,
+ File rawTextDirectory,
+ File xmlDirectory,
+ XMLFormat xmlFormat,
+ File xmiDirectory) {
+ super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory, null);
+ for (String name : PROPERTY_NAMES) {
+ this.loggers.put(name, Logger.getLogger(String.format("%s.%s", this.getClass().getName(), name)));
+ }
+ }
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+ aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+ aggregateBuilder.add(CopyFromGold.getDescription(EventProperties.class));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+ }
+
+ @Override
+ protected Map<String, AnnotationStatistics<String>> test(
+ CollectionReader collectionReader,
+ File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+// aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+ aggregateBuilder.add(SentenceAnnotator.getDescription());
+ aggregateBuilder.add(TokenAnnotator.getDescription());
+ aggregateBuilder.add(PosTaggerAnnotator.getDescription());
+ aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"));
+ aggregateBuilder.add(ParserAnnotator.getDescription());
+// aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+// aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class));
+ aggregateBuilder.add(EventToClearTKEventAnnotator.getAnnotatorDescription());//for every cTakes eventMention, create a cleartk event
+ aggregateBuilder.add(ClearTKDocumentCreationTimeAnnotator.getAnnotatorDescription());//for every jCAS create an empty DCT, and add it to index
+ aggregateBuilder.add(EventTenseAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/event/eventtenseannotator/model.jar"));
+ aggregateBuilder.add(EventAspectAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/event/eventaspectannotator/model.jar"));
+ aggregateBuilder.add(EventClassAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/event/eventclassannotator/model.jar"));
+ aggregateBuilder.add(EventPolarityAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/event/eventpolarityannotator/model.jar"));
+ aggregateBuilder.add(EventModalityAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/event/eventmodalityannotator/model.jar"));
+ aggregateBuilder.add(TemporalLinkEventToDocumentCreationTimeAnnotator.FACTORY.getAnnotatorDescription("/org/cleartk/timeml/tlink/temporallinkeventtodocumentcreationtimeannotator/model.jar"));
+ aggregateBuilder.add(ClearTKDocTimeRelAnnotator.getAnnotatorDescription());// for every tlink, check if it cover and event, add the tlink type to the event's docTimeRel attribute
+
+ Function<EventMention, ?> eventMentionToSpan = AnnotationStatistics.annotationToSpan();
+ Map<String, Function<EventMention, String>> propertyGetters;
+ propertyGetters = new HashMap<String, Function<EventMention, String>>();
+ for (String name : PROPERTY_NAMES) {
+ propertyGetters.put(name, getPropertyGetter(name));
+ }
+
+ Map<String, AnnotationStatistics<String>> statsMap = new HashMap<String, AnnotationStatistics<String>>();
+ statsMap.put(DOC_TIME_REL, new AnnotationStatistics<String>());
+ for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ String text = goldView.getDocumentText();
+ for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+ if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
+ List<EventMention> goldEvents = selectExact(goldView, EventMention.class, segment);
+ List<EventMention> systemEvents = selectExact(systemView, EventMention.class, segment);
+ for (String name : PROPERTY_NAMES) {
+ Function<EventMention, String> getProperty = propertyGetters.get(name);
+ statsMap.get(name).add(
+ goldEvents,
+ systemEvents,
+ eventMentionToSpan,
+ getProperty);
+ for (int i = 0; i < goldEvents.size(); ++i) {
+ String goldOutcome = getProperty.apply(goldEvents.get(i));
+ if ( i == systemEvents.size()){
+ break;
+ }
+ String systemOutcome = getProperty.apply(systemEvents.get(i));
+ if (!goldOutcome.equals(systemOutcome)) {
+ EventMention event = goldEvents.get(i);
+ int begin = event.getBegin();
+ int end = event.getEnd();
+ int windowBegin = Math.max(0, begin - 50);
+ int windowEnd = Math.min(text.length(), end + 50);
+ this.loggers.get(name).fine(String.format(
+ "%s was %s but should be %s, in ...%s[!%s!]%s...",
+ name,
+ systemOutcome,
+ goldOutcome,
+ text.substring(windowBegin, begin).replaceAll("[\r\n]", " "),
+ text.substring(begin, end),
+ text.substring(end, windowEnd).replaceAll("[\r\n]", " ")));
+ }
+ }
+ }
+ }
+ }
+ }
+ return statsMap;
+ }
+
+ public void logClassificationErrors(File outputDir, String outputFilePrefix) throws IOException {
+ if (!outputDir.exists()) {
+ outputDir.mkdirs();
+ }
+ for (String name : PROPERTY_NAMES) {
+ Logger logger = this.loggers.get(name);
+ logger.setLevel(Level.FINE);
+ File outputFile = new File(outputDir, String.format("%s.%s.log", outputFilePrefix, name));
+ FileHandler handler = new FileHandler(outputFile.getPath());
+ handler.setFormatter(new Formatter() {
+ @Override
+ public String format(LogRecord record) {
+ return record.getMessage() + '\n';
+ }
+ });
+ logger.addHandler(handler);
+ }
+ }
+
+ private static Function<EventMention, String> getPropertyGetter(final String propertyName) {
+ return new Function<EventMention, String>() {
+ @Override
+ public String apply(EventMention eventMention) {
+ EventProperties eventProperties = eventMention.getEvent().getProperties();
+ Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName);
+ return eventProperties.getFeatureValueAsString(feature);
+ }
+ };
+ }
+
+ public static class ClearEventProperties extends JCasAnnotator_ImplBase {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) {
+ eventProperties.setAspect(null);
+ eventProperties.setCategory(null);
+ eventProperties.setContextualAspect(null);
+ eventProperties.setContextualModality(null);
+ eventProperties.setDegree(null);
+ eventProperties.setDocTimeRel(null);
+ eventProperties.setPermanence(null);
+ eventProperties.setPolarity(0);
+ }
+ }
+
+ }
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventProperties.java
------------------------------------------------------------------------------
svn:mime-type = text/plain