You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/10/11 23:33:05 UTC
svn commit: r1397326 [2/2] - in /incubator/ctakes/trunk: ./ ctakes-temporal/
ctakes-temporal/.settings/ ctakes-temporal/src/ ctakes-temporal/src/main/
ctakes-temporal/src/main/java/ ctakes-temporal/src/main/java/org/
ctakes-temporal/src/main/java/org/a...
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.temporal.ae.EventAnnotator;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfEventSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ EvaluationOfEventSpans evaluation = new EvaluationOfEventSpans(
+ new File("target/eval"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getPatients().getList());
+ evaluation.setLogging(Level.FINE, new File("target/eval/ctakes-event-errors.log"));
+ List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+ for (AnnotationStatistics<String> stats : foldStats) {
+ System.err.println(stats);
+ }
+ System.err.println("OVERALL");
+ System.err.println(AnnotationStatistics.addAll(foldStats));
+ }
+
+ public EvaluationOfEventSpans(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ patientSets,
+ EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+ // AnnotatorType.UMLS_NAMED_ENTITIES,
+ // AnnotatorType.LEXICAL_VARIANTS,
+ // AnnotatorType.DEPENDENCIES,
+ // AnnotatorType.SEMANTIC_ROLES));
+ }
+
+ @Override
+ protected AnalysisEngineDescription getDataWriterDescription(File directory)
+ throws ResourceInitializationException {
+ return EventAnnotator.createDataWriterDescription(
+ LIBSVMStringOutcomeDataWriter.class,
+ directory);
+ }
+
+ @Override
+ protected void trainAndPackage(File directory) throws Exception {
+ JarClassifierBuilder.trainAndPackage(directory, "-c", "10000");
+ }
+
+ @Override
+ protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+ List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
+ result.add(EntityMention.class);
+ return result;
+ }
+
+ @Override
+ protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+ throws ResourceInitializationException {
+ return EventAnnotator.createAnnotatorDescription(directory);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, EventMention.class);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, EventMention.class);
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.temporal.ae.TimeAnnotator;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfTimeSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+ public static void main(String[] args) throws Exception {
+ Options options = CliFactory.parseArguments(Options.class, args);
+ EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
+ new File("target/eval"),
+ options.getRawTextDirectory(),
+ options.getKnowtatorXMLDirectory(),
+ options.getPatients().getList());
+ evaluation.setLogging(Level.FINE, new File("target/eval/ctakes-time-errors.log"));
+ List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+ for (AnnotationStatistics<String> stats : foldStats) {
+ System.err.println(stats);
+ }
+ System.err.println("OVERALL");
+ System.err.println(AnnotationStatistics.addAll(foldStats));
+ }
+
+ public EvaluationOfTimeSpans(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets) {
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ knowtatorXMLDirectory,
+ patientSets,
+ EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+ }
+
+ @Override
+ protected AnalysisEngineDescription getDataWriterDescription(File directory)
+ throws ResourceInitializationException {
+ return TimeAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class, directory);
+ }
+
+ @Override
+ protected void trainAndPackage(File directory) throws Exception {
+ JarClassifierBuilder.trainAndPackage(directory, "-c", "10000");
+ }
+
+ @Override
+ protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+ throws ResourceInitializationException {
+ return TimeAnnotator.createAnnotatorDescription(directory);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, TimeMention.class);
+ }
+
+ @Override
+ protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+ return JCasUtil.select(jCas, TimeMention.class);
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,393 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
+import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.OverlapAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.resource.FileResourceImpl;
+import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
+import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
+import org.apache.ctakes.core.resource.SuffixMaxentModelResourceImpl;
+import org.apache.ctakes.dependency.parser.ae.ClearParserDependencyParserAE;
+import org.apache.ctakes.dependency.parser.ae.ClearParserSemanticRoleLabelerAE;
+import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
+import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.temporal.ae.THYMEKnowtatorXMLReader;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.cleartk.util.cr.UriCollectionReader;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.component.ViewTextCopierAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.ExternalResourceFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+import com.lexicalscope.jewel.cli.Option;
+
+public abstract class Evaluation_ImplBase<STATISTICS_TYPE> extends
+ org.cleartk.eval.Evaluation_ImplBase<Integer, STATISTICS_TYPE> {
+
+ public enum AnnotatorType {
+ PART_OF_SPEECH_TAGS, UMLS_NAMED_ENTITIES, LEXICAL_VARIANTS, DEPENDENCIES, SEMANTIC_ROLES
+ };
+
+ protected final String GOLD_VIEW_NAME = "GoldView";
+
+ static interface Options {
+
+ @Option(longName = "text")
+ public File getRawTextDirectory();
+
+ @Option(longName = "xml")
+ public File getKnowtatorXMLDirectory();
+
+ @Option(longName = "patients")
+ public CommandLine.IntegerRanges getPatients();
+ }
+
+ protected File rawTextDirectory;
+
+ protected File knowtatorXMLDirectory;
+
+ protected List<Integer> patientSets;
+
+ private Set<AnnotatorType> annotatorFlags;
+
+ public Evaluation_ImplBase(
+ File baseDirectory,
+ File rawTextDirectory,
+ File knowtatorXMLDirectory,
+ List<Integer> patientSets,
+ Set<AnnotatorType> annotatorFlags) {
+ super(baseDirectory);
+ this.rawTextDirectory = rawTextDirectory;
+ this.knowtatorXMLDirectory = knowtatorXMLDirectory;
+ this.patientSets = patientSets;
+ this.annotatorFlags = annotatorFlags;
+ }
+
+ public List<STATISTICS_TYPE> crossValidation(int nFolds) throws Exception {
+ return this.crossValidation(this.patientSets, nFolds);
+ }
+
+ @Override
+ protected CollectionReader getCollectionReader(List<Integer> patientSets) throws Exception {
+ List<File> files = new ArrayList<File>();
+ for (Integer set : patientSets) {
+ File setTextDirectory = new File(this.rawTextDirectory, "doc" + set);
+ for (File file : setTextDirectory.listFiles()) {
+ files.add(file);
+ }
+ }
+ return UriCollectionReader.getCollectionReaderFromFiles(files);
+ }
+
+ protected AnalysisEngineDescription getPreprocessorTrainDescription() throws Exception {
+ return this.getPreprocessorDescription(PipelineType.TRAIN);
+ }
+
+ protected AnalysisEngineDescription getPreprocessorTestDescription() throws Exception {
+ return this.getPreprocessorDescription(PipelineType.TEST);
+ }
+
+ protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+ return new ArrayList<Class<? extends TOP>>();
+ }
+
+ private static enum PipelineType {
+ TRAIN, TEST
+ };
+
+ private AnalysisEngineDescription getPreprocessorDescription(PipelineType pipelineType)
+ throws Exception {
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+ switch (pipelineType) {
+ case TRAIN:
+ aggregateBuilder.add(THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory));
+ break;
+ case TEST:
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ViewCreatorAnnotator.class,
+ ViewCreatorAnnotator.PARAM_VIEW_NAME,
+ GOLD_VIEW_NAME));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ViewTextCopierAnnotator.class,
+ ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
+ CAS.NAME_DEFAULT_SOFA,
+ ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
+ GOLD_VIEW_NAME));
+ aggregateBuilder.add(
+ THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory),
+ CAS.NAME_DEFAULT_SOFA,
+ GOLD_VIEW_NAME);
+ for (Class<? extends TOP> annotationClass : this.getAnnotationClassesThatShouldBeGoldAtTestTime()) {
+ aggregateBuilder.add(AnnotationCopier.getDescription(
+ GOLD_VIEW_NAME,
+ CAS.NAME_DEFAULT_SOFA,
+ annotationClass));
+ }
+ break;
+ }
+ // identify segments
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+ // identify sentences
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ SentenceDetector.class,
+ "MaxentModel",
+ ExternalResourceFactory.createExternalResourceDescription(
+ SuffixMaxentModelResourceImpl.class,
+ SentenceDetector.class.getResource("../sentdetect/sdmed.mod"))));
+ // identify tokens
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+ // merge some tokens
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+
+ // identify part-of-speech tags if requested
+ if (this.annotatorFlags.contains(AnnotatorType.PART_OF_SPEECH_TAGS)) {
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ POSTagger.class,
+ POSTagger.POS_MODEL_FILE_PARAM,
+ "org/apache/ctakes/postagger/models/mayo-pos.zip",
+ POSTagger.TAG_DICTIONARY_PARAM,
+ "org/apache/ctakes/postagger/models/tag.dictionary.txt",
+ POSTagger.CASE_SENSITIVE_PARAM,
+ true));
+ }
+
+ // identify UMLS named entities if requested
+ if (this.annotatorFlags.contains(AnnotatorType.UMLS_NAMED_ENTITIES)) {
+ // remove gold mentions if they're there (we'll add cTAKES mentions later instead)
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(EntityMentionRemover.class));
+ // identify chunks
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ Chunker.class,
+ Chunker.CHUNKER_MODEL_FILE_PARAM,
+ Chunker.class.getResource("../models/chunk-model.claims-1.5.zip").toURI().getPath(),
+ Chunker.CHUNKER_CREATOR_CLASS_PARAM,
+ DefaultChunkCreator.class));
+ // adjust NP in NP NP to span both
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 1));
+ // adjust NP in NP PP NP to span all three
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ ChunkAdjuster.class,
+ ChunkAdjuster.PARAM_CHUNK_PATTERN,
+ new String[] { "NP", "PP", "NP" },
+ ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+ 2));
+ // add lookup windows for each NP
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
+ // maximize lookup windows
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ OverlapAnnotator.class,
+ "A_ObjectClass",
+ LookupWindowAnnotation.class,
+ "B_ObjectClass",
+ LookupWindowAnnotation.class,
+ "OverlapType",
+ "A_ENV_B",
+ "ActionType",
+ "DELETE",
+ "DeleteAction",
+ new String[] { "selector=B" }));
+ // add UMLS on top of lookup windows
+ String umlsUser = System.getProperty("umls.user");
+ String umlsPassword = System.getProperty("umls.password");
+ if (umlsUser == null || umlsPassword == null) {
+ throw new IllegalArgumentException(
+ "The properties umls.user and umls.password must be set to use the "
+ + "UmlsDictionaryLookupAnnotator. You can set them by provding java with the "
+ + "arguments -Dumls.user=... and -Dumls.password=...");
+ }
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ UmlsDictionaryLookupAnnotator.class,
+ "UMLSAddr",
+ "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
+ "UMLSVendor",
+ "NLM-6515182895",
+ "UMLSUser",
+ umlsUser,
+ "UMLSPW",
+ umlsPassword,
+ "LookupDescriptor",
+ ExternalResourceFactory.createExternalResourceDescription(
+ FileResourceImpl.class,
+ getResourceAsFile(UmlsDictionaryLookupAnnotator.class, "../LookupDesc_Db.xml")),
+ "DbConnection",
+ ExternalResourceFactory.createExternalResourceDescription(
+ JdbcConnectionResourceImpl.class,
+ "",
+ JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
+ "org.hsqldb.jdbcDriver",
+ JdbcConnectionResourceImpl.PARAM_URL,
+ "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
+ "RxnormIndexReader",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LuceneIndexReaderResourceImpl.class,
+ "",
+ "UseMemoryIndex",
+ true,
+ "IndexDirectory",
+ getResourceAsFile(UmlsDictionaryLookupAnnotator.class, "../rxnorm_index")),
+ "OrangeBookIndexReader",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LuceneIndexReaderResourceImpl.class,
+ "",
+ "UseMemoryIndex",
+ true,
+ "IndexDirectory",
+ getResourceAsFile(UmlsDictionaryLookupAnnotator.class, "../OrangeBook"))));
+ }
+
+ // add lvg annotator
+ if (this.annotatorFlags.contains(AnnotatorType.LEXICAL_VARIANTS)) {
+ String[] XeroxTreebankMap = {
+ "adj|JJ",
+ "adv|RB",
+ "aux|AUX",
+ "compl|CS",
+ "conj|CC",
+ "det|DET",
+ "modal|MD",
+ "noun|NN",
+ "prep|IN",
+ "pron|PRP",
+ "verb|VB" };
+ String[] ExclusionSet = {
+ "and",
+ "And",
+ "by",
+ "By",
+ "for",
+ "For",
+ "in",
+ "In",
+ "of",
+ "Of",
+ "on",
+ "On",
+ "the",
+ "The",
+ "to",
+ "To",
+ "with",
+ "With" };
+ AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
+ LvgAnnotator.class,
+ "UseSegments",
+ false,
+ "SegmentsToSkip",
+ new String[0],
+ "UseCmdCache",
+ false,
+ "CmdCacheFileLocation",
+ "/org/apache/ctakes/lvg/2005_norm.voc",
+ "CmdCacheFrequencyCutoff",
+ 20,
+ "ExclusionSet",
+ ExclusionSet,
+ "XeroxTreebankMap",
+ XeroxTreebankMap,
+ "LemmaCacheFileLocation",
+ "/org/apache/ctakes/lvg/2005_lemma.voc",
+ "UseLemmaCache",
+ false,
+ "LemmaCacheFrequencyCutoff",
+ 20,
+ "PostLemmas",
+ true,
+ "LvgCmdApi",
+ ExternalResourceFactory.createExternalResourceDescription(
+ LvgCmdApiResourceImpl.class,
+ getResourceAsFile(LvgAnnotator.class, "../data/config/lvg.properties")));
+ aggregateBuilder.add(lvgAnnotator);
+ }
+
+ // add dependency parser
+ if (this.annotatorFlags.contains(AnnotatorType.DEPENDENCIES)) {
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserDependencyParserAE.class));
+ }
+
+ // add semantic role labeler
+ if (this.annotatorFlags.contains(AnnotatorType.SEMANTIC_ROLES)) {
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserSemanticRoleLabelerAE.class));
+ }
+ return aggregateBuilder.createAggregateDescription();
+ }
+
+ /**
+ * This is hack to deal with classes that don't handle resources correctly
+ */
+ private static File getResourceAsFile(Class<?> cls, String path) throws URISyntaxException {
+ // this will fail if the resource is not a real File, but the UMLS code assumes that
+ return new File(cls.getResource(path).toURI());
+ }
+
+ public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
+ if (chunk.getChunkType().equals("NP")) {
+ new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
+ }
+ }
+ }
+ }
+
+ public static class EntityMentionRemover extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (EntityMention mention : Lists.newArrayList(JCasUtil.select(jCas, EntityMention.class))) {
+ mention.removeFromIndexes();
+ }
+ }
+ }
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.ctakes.temporal.ae.THYMEKnowtatorXMLReader;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.util.ViewURIUtil;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.cleartk.util.cr.UriCollectionReader;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Ordering;
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+public class PrintRelations {
+
+ interface Options {
+
+ @Option(longName = "text")
+ public File getRawTextDirectory();
+
+ @Option(longName = "xml")
+ public File getKnowtatorXMLDirectory();
+
+ @Option(longName = "patients")
+ public CommandLine.IntegerRanges getPatients();
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ // parse command line options
+ Options options = CliFactory.parseArguments(Options.class, args);
+ File rawTextDirectory = options.getRawTextDirectory();
+ File knowtatorXMLDirectory = options.getKnowtatorXMLDirectory();
+ List<Integer> patientSets = options.getPatients().getList();
+
+ // collect the files for all the patients
+ List<File> files = new ArrayList<File>();
+ for (Integer set : patientSets) {
+ File subDir = new File(rawTextDirectory, "doc" + set);
+ files.addAll(Arrays.asList(subDir.listFiles()));
+ }
+
+ // construct reader and Knowtator XML parser
+ CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+ AggregateBuilder aggregateBuilder = new AggregateBuilder();
+ aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+ aggregateBuilder.add(THYMEKnowtatorXMLReader.getDescription(knowtatorXMLDirectory));
+
+ // walk through each document in the collection
+ for (JCas jCas : new JCasIterable(reader, aggregateBuilder.createAggregate())) {
+ System.err.println(ViewURIUtil.getURI(jCas));
+
+ // collect all relations and sort them by the order they appear in the text
+ Collection<BinaryTextRelation> relations = JCasUtil.select(jCas, BinaryTextRelation.class);
+ List<BinaryTextRelation> relationList = new ArrayList<BinaryTextRelation>(relations);
+ Collections.sort(relationList, BY_RELATION_OFFSETS);
+
+ for (EntityMention entityMention : JCasUtil.select(jCas, EntityMention.class)) {
+ System.err.printf("%s (%s)\n", entityMention.getCoveredText(), entityMention.getTypeID());
+ }
+
+ // print out the relations for visual inspection
+ // for (BinaryTextRelation relation : relationList) {
+ // Annotation source = relation.getArg1().getArgument();
+ // Annotation target = relation.getArg2().getArgument();
+ // String type = relation.getCategory();
+ // System.err.printf("%s(%s,%s)\n", type, source.getCoveredText(), target.getCoveredText());
+ // }
+ System.err.println();
+ }
+ }
+
+ /**
+ * Orders relations to match their order in the text (as defined by the spans of their arguments)
+ */
+ private static final Ordering<BinaryTextRelation> BY_RELATION_OFFSETS = Ordering.<Integer> natural().lexicographical().onResultOf(
+ new Function<BinaryTextRelation, Set<Integer>>() {
+ @Override
+ public Set<Integer> apply(BinaryTextRelation relation) {
+ Annotation arg1 = relation.getArg1().getArgument();
+ Annotation arg2 = relation.getArg2().getArgument();
+ return new TreeSet<Integer>(Arrays.asList(arg1.getBegin(), arg2.getBegin()));
+ }
+ });
+}
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/ctakes/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/pom.xml?rev=1397326&r1=1397325&r2=1397326&view=diff
==============================================================================
--- incubator/ctakes/trunk/pom.xml (original)
+++ incubator/ctakes/trunk/pom.xml Thu Oct 11 21:33:04 2012
@@ -40,6 +40,7 @@
<module>ctakes-clinical-pipeline</module>
<module>ctakes-pad-term-spotter</module>
<module>ctakes-assertion</module>
+ <module>ctakes-temporal</module>
</modules>
<dependencyManagement>
<dependencies>
@@ -232,6 +233,11 @@
<artifactId>args4j</artifactId>
<version>2.0.16</version>
</dependency>
+ <dependency>
+ <groupId>com.lexicalscope.jewelcli</groupId>
+ <artifactId>jewelcli</artifactId>
+ <version>0.8.3</version>
+ </dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>