You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/10/11 23:33:05 UTC
svn commit: r1397326 [1/2] - in /incubator/ctakes/trunk: ./ ctakes-temporal/
ctakes-temporal/.settings/ ctakes-temporal/src/ ctakes-temporal/src/main/
ctakes-temporal/src/main/java/ ctakes-temporal/src/main/java/org/
ctakes-temporal/src/main/java/org/a...
Author: stevenbethard
Date: Thu Oct 11 21:33:04 2012
New Revision: 1397326
URL: http://svn.apache.org/viewvc?rev=1397326&view=rev
Log:
Adds ctakes-temporal module
Added:
incubator/ctakes/trunk/ctakes-temporal/ (with props)
incubator/ctakes/trunk/ctakes-temporal/.classpath (with props)
incubator/ctakes/trunk/ctakes-temporal/.project (with props)
incubator/ctakes/trunk/ctakes-temporal/.settings/
incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs
incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs
incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs
incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs
incubator/ctakes/trunk/ctakes-temporal/pom.xml (with props)
incubator/ctakes/trunk/ctakes-temporal/src/
incubator/ctakes/trunk/ctakes-temporal/src/main/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (with props)
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java (with props)
Modified:
incubator/ctakes/trunk/pom.xml
Propchange: incubator/ctakes/trunk/ctakes-temporal/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Thu Oct 11 21:33:04 2012
@@ -0,0 +1 @@
+target
Added: incubator/ctakes/trunk/ctakes-temporal/.classpath
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.classpath?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.classpath (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.classpath Thu Oct 11 21:33:04 2012
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
Propchange: incubator/ctakes/trunk/ctakes-temporal/.classpath
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/ctakes/trunk/ctakes-temporal/.project
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.project?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.project (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.project Thu Oct 11 21:33:04 2012
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>ctakes-temporal</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ </natures>
+</projectDescription>
Propchange: incubator/ctakes/trunk/ctakes-temporal/.project
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/test/java=UTF-8
+encoding/<project>=UTF-8
Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6
Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
Added: incubator/ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/pom.xml?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/pom.xml (added)
+++ incubator/ctakes/trunk/ctakes-temporal/pom.xml Thu Oct 11 21:33:04 2012
@@ -0,0 +1,78 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>ctakes-temporal</artifactId>
+ <packaging>jar</packaging>
+ <name>Apache cTAKES Temporal Information Extraction</name>
+ <parent>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes</artifactId>
+ <version>3.0.0-SNAPSHOT</version>
+ </parent>
+ <dependencies>
+ <dependency>
+ <groupId>org.jdom</groupId>
+ <artifactId>jdom2</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.lexicalscope.jewelcli</groupId>
+ <artifactId>jewelcli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-util</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-ml</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-ml-opennlp-maxent</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-ml-libsvm</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-eval</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.cleartk</groupId>
+ <artifactId>cleartk-timeml</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-type-system</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-context-tokenizer</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-pos-tagger</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-chunker</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-dictionary-lookup</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-lvg</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.ctakes</groupId>
+ <artifactId>ctakes-dependency-parser</artifactId>
+ </dependency>
+ </dependencies>
+</project>
\ No newline at end of file
Propchange: incubator/ctakes/trunk/ctakes-temporal/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/xml
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class DocTimeRelAnnotator extends CleartkAnnotator<String> {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ DocTimeRelAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ DocTimeRelAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ private CleartkExtractor contextExtractor;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ CombinedExtractor baseExtractor = new CombinedExtractor(
+ new CoveredTextExtractor(),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+ this.contextExtractor = new CleartkExtractor(
+ BaseToken.class,
+ baseExtractor,
+ new Preceding(3),
+ new Covered(),
+ new Following(3));
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
+ List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+ if (this.isTraining()) {
+ String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
+ this.dataWriter.write(new Instance<String>(outcome, features));
+ } else {
+ String outcome = this.classifier.classify(features);
+ eventMention.getEvent().getProperties().setDocTimeRel(outcome);
+ }
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.io.LineProcessor;
+
+public class EventAnnotator extends CleartkAnnotator<String> {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ EventAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
+
+ protected List<CleartkExtractor> contextFeatureExtractors;
+
+ private BIOChunking<BaseToken, EntityMention> entityChunking;
+
+ private BIOChunking<BaseToken, EventMention> eventChunking;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+
+ // define chunkings
+ this.entityChunking = new BIOChunking<BaseToken, EntityMention>(
+ BaseToken.class,
+ EntityMention.class,
+ "typeID");
+ this.eventChunking = new BIOChunking<BaseToken, EventMention>(
+ BaseToken.class,
+ EventMention.class);
+
+ // add features: word, stem, pos and more
+ this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+ // try {
+ this.tokenFeatureExtractors.addAll(Arrays.asList(
+ new CoveredTextExtractor(),
+ new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech")));
+ // new SRLExtractor(),
+ // new CoveredTextToValuesExtractor("ACF", parseStringDoublesMap("/word_freq.lst")),
+ // new CoveredTextToValuesExtractor("PCA", parseStringDoublesMap("/word_pca.lst")),
+ // new CoveredTextToValuesExtractor("TimPCA", parseStringDoublesMap("/tim_word_pca.txt")),
+ // new PhraseExtractor()));
+ // } catch (IOException e) {
+ // throw new ResourceInitializationException(e);
+ // }
+
+ // add window of features before and after
+ CombinedExtractor subExtractor = new CombinedExtractor(
+ new CoveredTextExtractor(),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+ // new SRLExtractor(),
+ // new PhraseExtractor());
+ this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+ this.contextFeatureExtractors.add(new CleartkExtractor(
+ BaseToken.class,
+ subExtractor,
+ new Preceding(3),
+ new Following(3)));
+ }
+
+ // private static Map<String, double[]> parseStringDoublesMap(String resourcePath)
+ // throws IOException {
+ // StringToDoublesProcessor processor = new StringToDoublesProcessor();
+ // URL url = EventAnnotator.class.getResource(resourcePath);
+ // return Resources.readLines(url, Charsets.US_ASCII, processor);
+ // }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ // classify tokens within each sentence
+ for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+
+ // during training, the list of all outcomes for the tokens
+ List<String> outcomes;
+ if (this.isTraining()) {
+ List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sentence);
+ outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
+ }
+ // during prediction, the list of outcomes predicted so far
+ else {
+ outcomes = new ArrayList<String>();
+ }
+
+ // get BIO entity tags for each entity type
+ int[] entityTypeIDs = new int[] {
+ CONST.NE_TYPE_ID_ANATOMICAL_SITE,
+ CONST.NE_TYPE_ID_DISORDER,
+ CONST.NE_TYPE_ID_DRUG,
+ CONST.NE_TYPE_ID_FINDING,
+ CONST.NE_TYPE_ID_PROCEDURE,
+ CONST.NE_TYPE_ID_UNKNOWN };
+ List<EntityMention> entities = JCasUtil.selectCovered(jCas, EntityMention.class, sentence);
+ Map<Integer, List<String>> entityTagsByType = new HashMap<Integer, List<String>>();
+ for (int typeID : entityTypeIDs) {
+ Predicate<EntityMention> hasTypeID = hasEntityType(typeID);
+ List<EntityMention> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
+ entityTagsByType.put(typeID, this.entityChunking.createOutcomes(jCas, tokens, subEntities));
+ }
+
+ // extract features for all tokens
+ int tokenIndex = -1;
+ int window = 2;
+ for (BaseToken token : tokens) {
+ ++tokenIndex;
+
+ List<Feature> features = new ArrayList<Feature>();
+ // features from token attributes
+ for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
+ features.addAll(extractor.extract(jCas, token));
+ }
+ // features from surrounding tokens
+ for (CleartkExtractor extractor : this.contextFeatureExtractors) {
+ features.addAll(extractor.extractWithin(jCas, token, sentence));
+ }
+ // features from surrounding entities
+ for (int typeID : entityTypeIDs) {
+ List<String> tokenEntityTags = entityTagsByType.get(typeID);
+ int begin = Math.max(tokenIndex - window, 0);
+ int end = Math.min(tokenIndex + window, tokenEntityTags.size());
+ for (int i = begin; i < end; ++i) {
+ String name = String.format("EntityTag_%d_%d", typeID, i - begin);
+ features.add(new Feature(name, tokenEntityTags.get(i)));
+ }
+ }
+ // features from previous classifications
+ int nPreviousClassifications = 2;
+ for (int i = nPreviousClassifications; i > 0; --i) {
+ int index = tokenIndex - i;
+ String previousOutcome = index < 0 ? "O" : outcomes.get(index);
+ features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
+ }
+ // if training, write to data file
+ if (this.isTraining()) {
+ String outcome = outcomes.get(tokenIndex);
+ this.dataWriter.write(new Instance<String>(outcome, features));
+ }
+
+ // if predicting, add prediction to outcomes
+ else {
+ outcomes.add(this.classifier.classify(features));
+ }
+ }
+
+ // during prediction, convert chunk labels to events and add them to the CAS
+ if (!this.isTraining()) {
+ this.eventChunking.createChunks(jCas, tokens, outcomes);
+ }
+ }
+ }
+
+ private static Predicate<EntityMention> hasEntityType(final int typeID) {
+ return new Predicate<EntityMention>() {
+ public boolean apply(EntityMention mention) {
+ return mention.getTypeID() == typeID;
+ }
+ };
+ }
+
+ private static class StringToDoublesProcessor implements LineProcessor<Map<String, double[]>> {
+ private Logger logger = Logger.getLogger(this.getClass().getName());
+
+ private Map<String, double[]> result = new HashMap<String, double[]>();
+
+ private int length = -1;
+
+ @Override
+ public Map<String, double[]> getResult() {
+ return this.result;
+ }
+
+ @Override
+ public boolean processLine(String line) throws IOException {
+ String[] parts = line.trim().split(",");
+ String key = parts[0];
+ int partsOffset = 0;
+ if (this.length == -1) {
+ this.length = parts.length;
+ } else if (parts.length != this.length) {
+ String message = "expected %d parts, found %d, skipping line '%s'";
+ this.logger.warning(String.format(message, this.length, parts.length, line));
+ return true;
+ }
+ double[] values = new double[parts.length - 1];
+ for (int i = 0; i < values.length; ++i) {
+ values[i] = Double.parseDouble(parts[i + 1 + partsOffset]);
+ }
+ this.result.put(key, values);
+ return true;
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.net.URI;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+
+public class THYMEKnowtatorXMLReader extends SHARPKnowtatorXMLReader {
+
+ public static final String PARAM_KNOWTATOR_XML_DIRECTORY = "knowtatorXMLDirectory";
+
+ @ConfigurationParameter(name = PARAM_KNOWTATOR_XML_DIRECTORY, mandatory = true)
+ protected File knowtatorXMLDirectory;
+
+ public static AnalysisEngineDescription getDescription(File knowtatorXMLDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ THYMEKnowtatorXMLReader.class,
+ THYMEKnowtatorXMLReader.PARAM_KNOWTATOR_XML_DIRECTORY,
+ knowtatorXMLDirectory);
+ }
+
+ @Override
+ protected URI getKnowtatorXML(JCas jCas) throws AnalysisEngineProcessException {
+ URI uri = ViewURIUtil.getURI(jCas);
+ File file = new File(uri.getPath());
+ String subDir = file.getParentFile().getName();
+ Matcher matcher = Pattern.compile("^doc(\\d+)$").matcher(subDir);
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException("Unrecognized subdirectory naming: " + subDir);
+ }
+ subDir = String.format("Set%02d", Integer.parseInt(matcher.group(1)));
+ String fileName = file.getName() + ".knowtator.xml";
+ return new File(new File(this.knowtatorXMLDirectory, subDir), fileName).toURI();
+ }
+
+ @Override
+ protected String[] getAnnotatorNames() {
+ return new String[] { "consensus set annotator team", "consensus set_rel annotator team" };
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class TimeAnnotator extends CleartkAnnotator<String> {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass,
+ File outputDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ TimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
+ protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
+
+ protected List<CleartkExtractor> contextFeatureExtractors;
+
+ private BIOChunking<BaseToken, TimeMention> timeChunking;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+
+ // define chunking
+ this.timeChunking = new BIOChunking<BaseToken, TimeMention>(BaseToken.class, TimeMention.class);
+
+ CombinedExtractor allExtractors = new CombinedExtractor(
+ new CoveredTextExtractor(),
+ new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+
+ this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+ this.tokenFeatureExtractors.add(allExtractors);
+
+ this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+ this.contextFeatureExtractors.add(new CleartkExtractor(
+ BaseToken.class,
+ allExtractors,
+ new Preceding(3),
+ new Following(3)));
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ // classify tokens within each sentence
+ for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+
+ // during training, the list of all outcomes for the tokens
+ List<String> outcomes;
+ if (this.isTraining()) {
+ List<TimeMention> times = JCasUtil.selectCovered(jCas, TimeMention.class, sentence);
+ outcomes = this.timeChunking.createOutcomes(jCas, tokens, times);
+ }
+ // during prediction, the list of outcomes predicted so far
+ else {
+ outcomes = new ArrayList<String>();
+ }
+
+ // extract features for all tokens
+ int tokenIndex = -1;
+ for (BaseToken token : tokens) {
+ ++tokenIndex;
+
+ List<Feature> features = new ArrayList<Feature>();
+ // features from token attributes
+ for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
+ features.addAll(extractor.extract(jCas, token));
+ }
+ // features from surrounding tokens
+ for (CleartkExtractor extractor : this.contextFeatureExtractors) {
+ features.addAll(extractor.extractWithin(jCas, token, sentence));
+ }
+ // features from previous classifications
+ int nPreviousClassifications = 2;
+ for (int i = nPreviousClassifications; i > 0; --i) {
+ int index = tokenIndex - i;
+ String previousOutcome = index < 0 ? "O" : outcomes.get(index);
+ features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
+ }
+ // if training, write to data file
+ if (this.isTraining()) {
+ String outcome = outcomes.get(tokenIndex);
+ this.dataWriter.write(new Instance<String>(outcome, features));
+ }
+
+ // if predicting, add prediction to outcomes
+ else {
+ outcomes.add(this.classifier.classify(features));
+ }
+ }
+
+ // during prediction, convert chunk labels to times and add them to the CAS
+ if (!this.isTraining()) {
+ this.timeChunking.createChunks(jCas, tokens, outcomes);
+ }
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+
+public class CoveredTextToValuesExtractor implements SimpleFeatureExtractor {
+
+ private String name;
+
+ private Map<String, double[]> textDoublesMap;
+
+ private double[] meanValues;
+
+ public CoveredTextToValuesExtractor(String name, Map<String, double[]> textDoublesMap) {
+ super();
+ this.name = name;
+ this.textDoublesMap = textDoublesMap;
+ int nMapEntries = this.textDoublesMap.size();
+ if (nMapEntries == 0) {
+ throw new IllegalArgumentException("textDoublesMap cannot be empty");
+ }
+ int nValues = textDoublesMap.entrySet().iterator().next().getValue().length;
+ this.meanValues = new double[nValues];
+ for (double[] values : textDoublesMap.values()) {
+ for (int i = 0; i < values.length; ++i) {
+ this.meanValues[i] += values[i];
+ }
+ }
+ for (int i = 0; i < this.meanValues.length; ++i) {
+ this.meanValues[i] /= nMapEntries;
+ }
+ }
+
+ @Override
+ public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
+ double[] values = this.textDoublesMap.get(annotation.getCoveredText());
+ if (values == null) {
+ values = this.meanValues;
+ }
+ ArrayList<Feature> features = new ArrayList<Feature>();
+ for (int i = 0; i < values.length; ++i) {
+ String name = Feature.createName(this.name, String.valueOf(i));
+ features.add(new Feature(name, values[i]));
+ }
+ return features;
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class PhraseExtractor implements SimpleFeatureExtractor {
+
+ @Override
+ public List<Feature> extract(JCas jCas, Annotation token) throws CleartkExtractorException {
+ String featureValue = "NotNPVP";
+ for (Chunk chunk : JCasUtil.selectCovered(jCas, Chunk.class, token)) {
+ String chunkType = chunk.getChunkType();
+ if (chunkType.equals("NP")) {
+ featureValue = "NP";
+ break;
+ } else if (chunkType.equals("VP")) {
+ featureValue = "VP";
+ break;
+ }
+ }
+ return Collections.singletonList(new Feature(featureValue));
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class SRLExtractor implements SimpleFeatureExtractor {
+
+ @Override
+ public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
+ throws CleartkExtractorException {
+ // TODO: don't iterate over the entire CAS for each focusAnnotation; use JCasUtil.indexCovering
+ // and cache the results so that we only do this once per CAS
+
+ Feature feature = new Feature("NoRole");
+ for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
+
+ for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
+ if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+ feature = new Feature("Predicate");
+ // System.out.println("*******************\tPredicate is :"+ predicate.getCoveredText());
+ return Collections.singletonList(feature);
+ }
+ }
+
+ for (SemanticRoleRelation relation : JCasUtil.select(
+ predicate.getRelations(),
+ SemanticRoleRelation.class)) {
+ SemanticArgument arg = relation.getArgument();
+ // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
+ for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
+ if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+ String label = arg.getLabel();
+ feature = new Feature(label);
+ // System.out.println("*******************\tfeature is :");
+ return Collections.singletonList(feature);
+ }
+ }
+ }
+ }
+
+ return Collections.singletonList(feature);
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class AnnotationCopier extends JCasAnnotator_ImplBase {
+
+ public static AnalysisEngineDescription getDescription(
+ String sourceView,
+ String targetView,
+ Class<? extends TOP> annotationClass) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ AnnotationCopier.class,
+ PARAM_SOURCE_VIEW,
+ sourceView,
+ PARAM_TARGET_VIEW,
+ targetView,
+ PARAM_ANNOTATION_CLASS,
+ annotationClass);
+ }
+
+ public static final String PARAM_SOURCE_VIEW = "SourceView";
+
+ @ConfigurationParameter(name = PARAM_SOURCE_VIEW, mandatory = true)
+ private String sourceView;
+
+ public static final String PARAM_TARGET_VIEW = "TargetView";
+
+ @ConfigurationParameter(name = PARAM_TARGET_VIEW, mandatory = true)
+ private String targetView;
+
+ public static final String PARAM_ANNOTATION_CLASS = "AnnotationClass";
+
+ @ConfigurationParameter(name = PARAM_ANNOTATION_CLASS, mandatory = true)
+ private Class<? extends TOP> annotationClass;
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas sourceView, targetView;
+ try {
+ sourceView = jCas.getView(this.sourceView);
+ targetView = jCas.getView(this.targetView);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ CasCopier copier = new CasCopier(sourceView.getCas(), targetView.getCas());
+ for (TOP annotation : JCasUtil.select(sourceView, this.annotationClass)) {
+ TOP copy = (TOP) copier.copyFs(annotation);
+ // CasCopier does not change sofa of annotation; without the code below, you get the error:
+ // the Annotation "..." is over view "GoldView" and cannot be added to indexes associated
+ // with the different view "_InitialView".
+ Feature sofaFeature = copy.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA);
+ if (sofaFeature != null) {
+ copy.setFeatureValue(sofaFeature, targetView.getSofa());
+ }
+ copy.addToIndexes();
+ }
+
+ }
+
+}
\ No newline at end of file
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class CommandLine {
+
+ public static class IntegerRanges {
+
+ private List<Integer> items = new ArrayList<Integer>();
+
+ public List<Integer> getList() {
+ return this.items;
+ }
+
+ public IntegerRanges(String string) {
+ for (String part : string.split("\\s*,\\s*")) {
+ Matcher matcher = Pattern.compile("(\\d+)-(\\d+)").matcher(part);
+ if (matcher.matches()) {
+ int begin = Integer.parseInt(matcher.group(1));
+ int end = Integer.parseInt(matcher.group(2));
+ for (int i = begin; i <= end; ++i) {
+ this.items.add(i);
+ }
+ } else {
+ this.items.add(Integer.parseInt(part));
+ }
+ }
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Ordering;
+
+public abstract class EvaluationOfAnnotationSpans_ImplBase extends
+ Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+ private final Logger logger = Logger.getLogger(this.getClass().getName());
+
+ public void setLogging(Level level, File outputFile) throws IOException {
+ if (!outputFile.getParentFile().exists()) {
+ outputFile.getParentFile().mkdirs();
+ }
+ this.logger.setLevel(level);
+ FileHandler handler = new FileHandler(outputFile.getPath());
+ handler.setFormatter(new Formatter() {
+ @Override
+ public String format(LogRecord record) {
+ return record.getMessage() + '\n';
+ }
+ });
+ this.logger.addHandler(handler);
+ }
+
+ public EvaluationOfAnnotationSpans_ImplBase(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets,
+ Set<AnnotatorType> annotatorFlags) {
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, patientSets, annotatorFlags);
+ }
+
+ protected abstract AnalysisEngineDescription getDataWriterDescription(File directory)
+ throws ResourceInitializationException;
+
+ protected abstract void trainAndPackage(File directory) throws Exception;
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTrainDescription());
+ aggregateBuilder.add(this.getDataWriterDescription(directory));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+ this.trainAndPackage(directory);
+ }
+
+ protected abstract AnalysisEngineDescription getAnnotatorDescription(File directory)
+ throws ResourceInitializationException;
+
+ protected abstract Collection<? extends Annotation> getGoldAnnotations(JCas jCas);
+
+ protected abstract Collection<? extends Annotation> getSystemAnnotations(JCas jCas);
+
+ @Override
+ protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
+ throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTestDescription());
+ aggregateBuilder.add(this.getAnnotatorDescription(directory));
+
+ AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
+ Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
+ new Function<Annotation, List<Integer>>() {
+ @Override
+ public List<Integer> apply(Annotation annotation) {
+ return Arrays.asList(annotation.getBegin(), annotation.getEnd());
+ }
+ });
+ for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ Collection<? extends Annotation> goldAnnotations = this.getGoldAnnotations(goldView);
+ Collection<? extends Annotation> systemAnnotations = this.getSystemAnnotations(systemView);
+ stats.add(goldAnnotations, systemAnnotations);
+
+ Set<Annotation> goldSet = new TreeSet<Annotation>(bySpans);
+ goldSet.addAll(goldAnnotations);
+ Set<Annotation> systemSet = new TreeSet<Annotation>(bySpans);
+ systemSet.addAll(systemAnnotations);
+
+ Set<Annotation> goldOnly = new TreeSet<Annotation>(bySpans);
+ goldOnly.addAll(goldSet);
+ goldOnly.removeAll(systemSet);
+
+ Set<Annotation> systemOnly = new TreeSet<Annotation>(bySpans);
+ systemOnly.addAll(systemSet);
+ systemOnly.removeAll(goldSet);
+
+ String text = jCas.getDocumentText().replaceAll("[\r\n]", " ");
+ if (!goldOnly.isEmpty() || !systemOnly.isEmpty()) {
+ this.logger.fine("Errors in : " + ViewURIUtil.getURI(jCas).toString());
+ Set<Annotation> errors = new TreeSet<Annotation>(bySpans);
+ errors.addAll(goldOnly);
+ errors.addAll(systemOnly);
+ for (Annotation annotation : errors) {
+ int begin = annotation.getBegin();
+ int end = annotation.getEnd();
+ int windowBegin = Math.max(0, begin - 50);
+ int windowEnd = Math.min(text.length(), end + 50);
+ String label = goldOnly.contains(annotation) ? "DROPPED:" : "ADDED: ";
+ this.logger.fine(String.format(
+ "%s ...%s[!%s!]%s...",
+ label,
+ text.substring(windowBegin, begin),
+ text.substring(begin, end),
+ text.substring(end, windowEnd)));
+ }
+ }
+ }
+ return stats;
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.syntax.opennlp.ParserAnnotator;
+import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
+import org.cleartk.syntax.opennlp.SentenceAnnotator;
+import org.cleartk.timeml.event.EventAnnotator;
+import org.cleartk.timeml.type.Event;
+import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
+import org.cleartk.token.tokenizer.TokenAnnotator;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfClearTKEventSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ EvaluationOfClearTKEventSpans evaluation = new EvaluationOfClearTKEventSpans(
+ new File("target/eval"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getPatients().getList());
+ evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-event-errors.log"));
+ List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+ for (AnnotationStatistics<String> stats : foldStats) {
+ System.err.println(stats);
+ }
+ System.err.println("OVERALL");
+ System.err.println(AnnotationStatistics.addAll(foldStats));
+ }
+
+ public EvaluationOfClearTKEventSpans(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ patientSets,
+ EnumSet.noneOf(AnnotatorType.class));
+ }
+
+ @Override
+ protected AnalysisEngineDescription getDataWriterDescription(File directory)
+ throws ResourceInitializationException {
+ // not training a model - just using the ClearTK one
+ return AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class);
+ }
+
+ @Override
+ protected void trainAndPackage(File directory) throws Exception {
+ // not training a model - just using the ClearTK one
+ }
+
+ @Override
+ protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+ throws ResourceInitializationException {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(SentenceAnnotator.getDescription());
+ aggregateBuilder.add(TokenAnnotator.getDescription());
+ aggregateBuilder.add(PosTaggerAnnotator.getDescription());
+ aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"));
+ aggregateBuilder.add(ParserAnnotator.getDescription());
+ aggregateBuilder.add(EventAnnotator.FACTORY.getAnnotatorDescription());
+ return aggregateBuilder.createAggregateDescription();
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, EventMention.class);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, Event.class);
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
+import org.cleartk.syntax.opennlp.SentenceAnnotator;
+import org.cleartk.timeml.time.TimeAnnotator;
+import org.cleartk.timeml.type.Time;
+import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
+import org.cleartk.token.tokenizer.TokenAnnotator;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfClearTKTimeSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ EvaluationOfClearTKTimeSpans evaluation = new EvaluationOfClearTKTimeSpans(
+ new File("target/eval"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getPatients().getList());
+ evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-time-errors.log"));
+ List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+ for (AnnotationStatistics<String> stats : foldStats) {
+ System.err.println(stats);
+ }
+ System.err.println("OVERALL");
+ System.err.println(AnnotationStatistics.addAll(foldStats));
+ }
+
+ public EvaluationOfClearTKTimeSpans(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ patientSets,
+ EnumSet.noneOf(AnnotatorType.class));
+ }
+
+ @Override
+ protected AnalysisEngineDescription getDataWriterDescription(File directory)
+ throws ResourceInitializationException {
+ // not training a model - just using the ClearTK one
+ return AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class);
+ }
+
+ @Override
+ protected void trainAndPackage(File directory) throws Exception {
+ // not training a model - just using the ClearTK one
+ }
+
+ @Override
+ protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+ throws ResourceInitializationException {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(SentenceAnnotator.getDescription());
+ aggregateBuilder.add(TokenAnnotator.getDescription());
+ aggregateBuilder.add(PosTaggerAnnotator.getDescription());
+ aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"));
+ aggregateBuilder.add(TimeAnnotator.FACTORY.getAnnotatorDescription());
+ return aggregateBuilder.createAggregateDescription();
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, TimeMention.class);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, Time.class);
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfEventProperties extends
+ Evaluation_ImplBase<Map<String, AnnotationStatistics<String>>> {
+
+ private static final String DOC_TIME_REL = "docTimeRel";
+
+ private static final List<String> PROPERTY_NAMES = Arrays.asList(DOC_TIME_REL);
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ EvaluationOfEventProperties evaluation = new EvaluationOfEventProperties(
+ new File("target/eval"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getPatients().getList());
+ List<Map<String, AnnotationStatistics<String>>> foldStats = evaluation.crossValidation(4);
+ Map<String, AnnotationStatistics<String>> overallStats = new HashMap<String, AnnotationStatistics<String>>();
+ for (String name : PROPERTY_NAMES) {
+ overallStats.put(name, new AnnotationStatistics<String>());
+ }
+ for (Map<String, AnnotationStatistics<String>> propertyStats : foldStats) {
+ for (String key : propertyStats.keySet()) {
+ overallStats.get(key).addAll(propertyStats.get(key));
+ }
+ }
+ for (String name : PROPERTY_NAMES) {
+ System.err.println("====================");
+ System.err.println(name);
+ for (int i = 0; i < foldStats.size(); ++i) {
+ System.err.println("--------------------");
+ System.err.println("Fold " + i);
+ System.err.println(foldStats.get(i).get(name));
+ }
+ System.err.println("--------------------");
+ System.err.println("Overall");
+ System.err.println(overallStats.get(name));
+ }
+ }
+
+ public EvaluationOfEventProperties(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ patientSets,
+ EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+ }
+
+ @Override
+ protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+ List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
+ result.add(EventMention.class);
+ return result;
+ }
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTrainDescription());
+ aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription(
+ LIBSVMStringOutcomeDataWriter.class,
+ directory));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+ JarClassifierBuilder.trainAndPackage(directory, "-c", "1000");
+ }
+
+ @Override
+ protected Map<String, AnnotationStatistics<String>> test(
+ CollectionReader collectionReader,
+ File directory) throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(this.getPreprocessorTestDescription());
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class));
+ aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(directory));
+
+ Function<EventMention, ?> eventMentionToSpan = AnnotationStatistics.annotationToSpan();
+ Map<String, Function<EventMention, String>> propertyGetters;
+ propertyGetters = new HashMap<String, Function<EventMention, String>>();
+ for (String name : PROPERTY_NAMES) {
+ propertyGetters.put(name, getPropertyGetter(name));
+ }
+
+ Map<String, AnnotationStatistics<String>> statsMap = new HashMap<String, AnnotationStatistics<String>>();
+ statsMap.put(DOC_TIME_REL, new AnnotationStatistics<String>());
+ for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
+ Collection<EventMention> systemEvents = JCasUtil.select(systemView, EventMention.class);
+ for (String name : PROPERTY_NAMES) {
+ statsMap.get(name).add(
+ goldEvents,
+ systemEvents,
+ eventMentionToSpan,
+ propertyGetters.get(name));
+ }
+ }
+ return statsMap;
+ }
+
+ private static Function<EventMention, String> getPropertyGetter(final String propertyName) {
+ return new Function<EventMention, String>() {
+ @Override
+ public String apply(EventMention eventMention) {
+ EventProperties eventProperties = eventMention.getEvent().getProperties();
+ Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName);
+ return eventProperties.getFeatureValueAsString(feature);
+ }
+ };
+ }
+
+ public static class ClearEventProperties extends JCasAnnotator_ImplBase {
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) {
+ eventProperties.setAspect(null);
+ eventProperties.setCategory(null);
+ eventProperties.setContextualAspect(null);
+ eventProperties.setContextualModality(null);
+ eventProperties.setDegree(null);
+ eventProperties.setDocTimeRel(null);
+ eventProperties.setPermanence(null);
+ eventProperties.setPolarity(0);
+ }
+ }
+
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
------------------------------------------------------------------------------
svn:mime-type = text/plain