You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/04/30 17:57:25 UTC
svn commit: r1477702 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/
ae/feature/ eval/
Author: tmill
Date: Tue Apr 30 15:57:24 2013
New Revision: 1477702
URL: http://svn.apache.org/r1477702
Log:
Add in treebank reading and a few evaluation options to temporal relation project.
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java?rev=1477702&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java Tue Apr 30 15:57:24 2013
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.Level;
+import org.cleartk.syntax.constituent.util.TreebankFormatParser;
+import org.cleartk.util.UIMAUtil;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class THYMETreebankReader extends JCasAnnotator_ImplBase {
+
+ public static Logger logger = Logger.getLogger(THYMETreebankReader.class);
+ public static final String TREEBANK_DIRECTORY = "treebankDirectory";
+ private static final Pattern headerPatt = Pattern.compile("\\[(meta|start|end) [^\\]]*?\\]"); //"\\[meta [^\\]]*\\]");
+
+ @ConfigurationParameter(name = TREEBANK_DIRECTORY, mandatory = true)
+ protected File treebankDirectory;
+ File[] subdirs = null;
+
+ @Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ subdirs = treebankDirectory.listFiles(new FileFilter(){
+ @Override
+ public boolean accept(File pathname) {
+ return pathname.isDirectory() && !pathname.isHidden();
+ }});
+ }
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ URI uri = ViewURIUtil.getURI(jcas);
+ logger.info("Document id is: " + uri.toString());
+
+ String fn = uri.getPath().substring(uri.getPath().lastIndexOf('/')+1) + ".xml.tree";
+ File treeFile = null;
+ for(File subdir : subdirs){
+ treeFile = new File(subdir, fn);
+ if(treeFile.exists()) break;
+ treeFile = null;
+ }
+
+ if(treeFile == null){
+ this.getContext().getLogger().log(Level.WARNING,"Could not find treeFile: " + fn);
+ // FIXME do automatic parse?
+ return;
+ }
+
+ String tbText;
+
+ try {
+ tbText = FileUtils.file2String(treeFile);
+ } catch (IOException e1) {
+ // shouldn't do automatic parse here -- something wrong with file itself, not the parse
+ throw new AnalysisEngineProcessException(e1);
+ }
+
+ StringBuffer fileText = new StringBuffer(jcas.getDocumentText());
+
+ // find and replace section headers with whitespace so the TreebankFormatParser skips over them...
+ Matcher m = headerPatt.matcher(fileText);
+ while(m.find()){
+ int headerLen = m.group().length();
+ fileText.replace(m.start(), m.end(), getWhitespaceString(headerLen));
+ }
+
+ List<org.cleartk.syntax.constituent.util.TopTreebankNode> utilTrees;
+ try {
+ utilTrees = TreebankFormatParser.parseDocument(tbText, 0, fileText.toString());
+ } catch (Exception e) {
+ this.getContext().getLogger().log(Level.WARNING,
+ String.format("Skipping %s due to alignment problems", fn),
+ e);
+ // FIXME - do automatic parse here...
+
+ return;
+ }
+
+ // if we get this far, the gold standard exists and we will let it do all of our tokenization.
+ // first we need to remove sentence and token annotations
+ List<Sentence> sents = new ArrayList<Sentence>(JCasUtil.select(jcas, Sentence.class));
+ for(Sentence sent : sents){
+ sent.removeFromIndexes();
+ }
+ List<BaseToken> toks = new ArrayList<BaseToken>(JCasUtil.select(jcas, BaseToken.class));
+ for(BaseToken tok : toks){
+ tok.removeFromIndexes();
+ }
+
+
+ // add Token, Sentence and TreebankNode annotations for the text
+ for (org.cleartk.syntax.constituent.util.TopTreebankNode utilTree : utilTrees) {
+
+ // create a Sentence and set its parse
+ TopTreebankNode tree = convert(utilTree, jcas);
+ Sentence sentence = new Sentence(jcas, tree.getBegin(), tree.getEnd());
+ sentence.addToIndexes();
+
+ // create the Tokens and add them to the Sentence
+ for (int i = 0; i < tree.getTerminals().size(); i++) {
+ TreebankNode leaf = tree.getTerminals(i);
+ if (leaf.getBegin() != leaf.getEnd()) {
+ BaseToken token = new BaseToken(jcas, leaf.getBegin(), leaf.getEnd());
+ token.setPartOfSpeech(leaf.getNodeType());
+ token.addToIndexes();
+ }
+ }
+ }
+ }
+
+ private static String getWhitespaceString(int headerLen) {
+ char[] chars = new char[headerLen];
+ Arrays.fill(chars, ' ');
+ return new String(chars);
+ }
+
+ // the ctakes syntax typesystem was modeled after cleartk -- as a result, the following methods borrow very liberally from
+ // org.cleartk.syntax.constituent.util.TreebankNodeUtility, which has a convert method for going from
+ // a "normal" tree to a cleartk/uima tree. This does the same, except goes to a ctakes/uima tree.
+ private static TopTreebankNode convert(org.cleartk.syntax.constituent.util.TopTreebankNode inTree, JCas jcas){
+ TopTreebankNode outTree = new TopTreebankNode(jcas, inTree.getTextBegin(), inTree.getTextEnd());
+ outTree.setTreebankParse(inTree.getTreebankParse());
+ convert(inTree, jcas, outTree, null);
+ initTerminalNodes(outTree, jcas);
+
+
+ outTree.addToIndexes();
+ return outTree;
+ }
+
+ public static void initTerminalNodes(
+ TopTreebankNode uimaNode,
+ JCas jCas) {
+ List<TerminalTreebankNode> terminals = new ArrayList<TerminalTreebankNode>();
+ _initTerminalNodes(uimaNode, terminals);
+
+ for (int i = 0; i < terminals.size(); i++) {
+ TerminalTreebankNode terminal = terminals.get(i);
+ terminal.setIndex(i);
+ }
+
+ FSArray terminalsFSArray = new FSArray(jCas, terminals.size());
+ terminalsFSArray.copyFromArray(
+ terminals.toArray(new FeatureStructure[terminals.size()]),
+ 0,
+ 0,
+ terminals.size());
+ uimaNode.setTerminals(terminalsFSArray);
+ }
+
+ private static void _initTerminalNodes(
+ TreebankNode node,
+ List<TerminalTreebankNode> terminals) {
+ FSArray children = node.getChildren();
+ for (int i = 0; i < children.size(); i++) {
+ TreebankNode child = (TreebankNode) children.get(i);
+ if (child instanceof TerminalTreebankNode) {
+ terminals.add((TerminalTreebankNode) child);
+ } else
+ _initTerminalNodes(child, terminals);
+ }
+ }
+
+ public static TreebankNode convert(
+ org.cleartk.syntax.constituent.util.TreebankNode pojoNode,
+ JCas jCas,
+ TreebankNode uimaNode,
+ TreebankNode parentNode) {
+ uimaNode.setNodeType(pojoNode.getType());
+ uimaNode.setNodeTags(UIMAUtil.toStringArray(jCas, pojoNode.getTags()));
+ uimaNode.setNodeValue(pojoNode.getValue());
+ uimaNode.setLeaf(pojoNode.isLeaf());
+ uimaNode.setParent(parentNode);
+
+ List<TreebankNode> uimaChildren = new ArrayList<TreebankNode>();
+ for (org.cleartk.syntax.constituent.util.TreebankNode child : pojoNode.getChildren()) {
+ TreebankNode childNode;
+ if (child.isLeaf()) {
+ childNode = new TerminalTreebankNode(jCas, child.getTextBegin(), child.getTextEnd());
+ } else {
+ childNode = new TreebankNode(
+ jCas,
+ child.getTextBegin(),
+ child.getTextEnd());
+ }
+ uimaChildren.add(convert(child, jCas, childNode, uimaNode));
+ childNode.addToIndexes();
+ }
+ FSArray uimaChildrenFSArray = new FSArray(jCas, uimaChildren.size());
+ uimaChildrenFSArray.copyFromArray(
+ uimaChildren.toArray(new FeatureStructure[uimaChildren.size()]),
+ 0,
+ 0,
+ uimaChildren.size());
+ uimaNode.setChildren(uimaChildrenFSArray);
+ return uimaNode;
+ }
+
+ public static AnalysisEngineDescription getDescription(File treebankDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ THYMETreebankReader.class,
+ THYMETreebankReader.TREEBANK_DIRECTORY,
+ treebankDirectory);
+ }
+
+ public static void main(String[] args){
+ String testString = "[meta rev_date=\"02/20/2010\" start_date=\"02/20/2010\" rev=\"0002\"]\n\n" +
+ "[start section id=\"20112\"]\n\n" +
+ "#1 Dilated esophagus on CT-scan\n" +
+ "#2 Adenocarcinoma right colon\n" +
+ "#3 Symptomatic anemia\n" +
+ "#4 Hypothyroidism";
+ Matcher m = headerPatt.matcher(testString);
+// System.out.println("Matches = " + m.matches());
+
+ while(m.find()){
+ System.out.println("FOund match at: " + m.start() + "-" + m.end());
+ }
+ }
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java Tue Apr 30 15:57:24 2013
@@ -32,10 +32,10 @@ public class EventTimeFlatTreeFeatureExt
}
SimpleTree bopTree = getTree(jcas, arg1, arg2, "BOP", new Function<BaseToken,String>(){public String apply(BaseToken t){ return t.getPartOfSpeech();}});
-// SimpleTree bowTree = getTree(jcas, arg1, arg2, "BOW", new Function<BaseToken,String>(){public String apply(BaseToken t){ return t.getCoveredText();}});
+ SimpleTree bowTree = getTree(jcas, arg1, arg2, "BOW", new Function<BaseToken,String>(){public String apply(BaseToken t){ return t.getCoveredText();}});
feats.add(new Feature("TK_BOP", bopTree.toString()));
-// feats.add(new Feature("TK_BOW", bowTree.toString()));
+ feats.add(new Feature("TK_BOW", bowTree.toString()));
return feats;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java Tue Apr 30 15:57:24 2013
@@ -79,7 +79,7 @@ public class TemporalPETExtractor implem
return features;
}
- private static void addOtherTimes(JCas jcas, TopTreebankNode root, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+ public static void addOtherTimes(JCas jcas, TopTreebankNode root, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
List<TimeMention> timexes = JCasUtil.selectCovered(TimeMention.class, root);
for(TimeMention timex : timexes){
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Tue Apr 30 15:57:24 2013
@@ -73,7 +73,7 @@ public abstract class EvaluationOfAnnota
File knowtatorXMLDirectory,
File xmiDirectory,
Class<? extends Annotation> annotationClass) {
- super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, null, xmiDirectory);
this.annotationClass = annotationClass;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java Tue Apr 30 15:57:24 2013
@@ -89,7 +89,7 @@ public class EvaluationOfEventProperties
File rawTextDirectory,
File knowtatorXMLDirectory,
File xmiDirectory) {
- super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, null, xmiDirectory);
for (String name : PROPERTY_NAMES) {
this.loggers.put(name, Logger.getLogger(String.format("%s.%s", this.getClass().getName(), name)));
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java Tue Apr 30 15:57:24 2013
@@ -19,6 +19,7 @@
package org.apache.ctakes.temporal.eval;
import java.io.File;
+import java.net.URI;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -28,6 +29,7 @@ import java.util.Set;
import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -38,9 +40,11 @@ import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AggregateBuilder;
@@ -54,34 +58,60 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
public class EvaluationOfTemporalRelations extends
Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+ static interface TempRelOptions extends Evaluation_ImplBase.Options{
+ @Option
+ public boolean getTest();
+
+ @Option
+ public boolean getPrintFormattedRelations();
+ }
public static void main(String[] args) throws Exception {
- Options options = CliFactory.parseArguments(Options.class, args);
+ TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args);
List<Integer> patientSets = options.getPatients().getList();
List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+ List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
+
EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations(
new File("target/eval/temporal-relations"),
options.getRawTextDirectory(),
options.getKnowtatorXMLDirectory(),
options.getXMIDirectory(),
- options.getPrintErrors());
+ options.getTreebankDirectory(),
+ options.getPrintErrors(),
+ options.getPrintFormattedRelations());
evaluation.prepareXMIsFor(patientSets);
- AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+ List<Integer> training = trainItems;
+ List<Integer> testing = null;
+ if(options.getTest()){
+ training.addAll(devItems);
+ testing = testItems;
+ }else{
+ testing = devItems;
+ }
+ AnnotationStatistics<String> stats = evaluation.trainAndTest(training, testing);
System.err.println(stats);
}
+ protected boolean printRelations = false;
+
public EvaluationOfTemporalRelations(
File baseDirectory,
File rawTextDirectory,
File knowtatorXMLDirectory,
File xmiDirectory,
- boolean printErrors) {
- super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+ File treebankDirectory,
+ boolean printErrors,
+ boolean printRelations) {
+ super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory);
this.printErrors = printErrors;
+ this.printRelations = printRelations;
}
@Override
@@ -90,6 +120,7 @@ public class EvaluationOfTemporalRelatio
aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class, BinaryTextRelation.class));
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class));
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCrossSentenceRelations.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveEventEventRelations.class));
aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription(
LIBSVMStringOutcomeDataWriter.class,
directory,
@@ -111,6 +142,10 @@ public class EvaluationOfTemporalRelatio
RemoveCrossSentenceRelations.class,
RemoveCrossSentenceRelations.PARAM_RELATION_VIEW,
GOLD_VIEW_NAME));
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+ RemoveEventEventRelations.class,
+ RemoveEventEventRelations.PARAM_RELATION_VIEW,
+ GOLD_VIEW_NAME));
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class));
aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(directory));
@@ -132,7 +167,12 @@ public class EvaluationOfTemporalRelatio
systemView,
BinaryTextRelation.class);
stats.add(goldRelations, systemRelations, getSpan, getOutcome);
-
+ if(this.printRelations){
+ URI uri = ViewURIUtil.getURI(jCas);
+ String[] path = uri.getPath().split("/");
+ printRelationAnnotations(path[path.length - 1], systemRelations);
+ }
+
if(this.printErrors){
Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
for (BinaryTextRelation relation : goldRelations) {
@@ -180,6 +220,60 @@ public class EvaluationOfTemporalRelatio
text.substring(begin, end).replaceAll("[\r\n]", " "));
}
+ private static void printRelationAnnotations(String fileName, Collection<BinaryTextRelation> relations) {
+
+ for(BinaryTextRelation binaryTextRelation : relations) {
+
+ Annotation arg1 = binaryTextRelation.getArg1().getArgument();
+ Annotation arg2 = binaryTextRelation.getArg2().getArgument();
+
+ String arg1Type = arg1.getClass().getSimpleName();
+ String arg2Type = arg2.getClass().getSimpleName();
+
+ int arg1Begin = arg1.getBegin();
+ int arg1End = arg1.getEnd();
+ int arg2Begin = arg2.getBegin();
+ int arg2End = arg2.getEnd();
+
+ String category = binaryTextRelation.getCategory();
+
+ System.out.format("%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\n",
+ fileName, category, arg1Type, arg1Begin, arg1End, arg2Type, arg2Begin, arg2End);
+ }
+ }
+
+ public static class RemoveEventEventRelations extends JCasAnnotator_ImplBase {
+
+ public static final String PARAM_RELATION_VIEW = "RelationView";
+
+ @ConfigurationParameter(name = PARAM_RELATION_VIEW)
+ private String relationViewName = CAS.NAME_DEFAULT_SOFA;
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas relationView;
+ try {
+ relationView = jCas.getView(this.relationViewName);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ for(BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(relationView, BinaryTextRelation.class))){
+ if(relation.getCategory().equals("CONTAINS")){
+ RelationArgument arg1 = relation.getArg1();
+ RelationArgument arg2 = relation.getArg2();
+ if(arg1.getArgument() instanceof TimeMention && arg2.getArgument() instanceof EventMention ||
+ arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof TimeMention){
+ // these are the kind we keep.
+ continue;
+ }
+ arg1.removeFromIndexes();
+ arg2.removeFromIndexes();
+ relation.removeFromIndexes();
+ }
+ }
+ }
+ }
+
public static class RemoveNonTLINKRelations extends JCasAnnotator_ImplBase {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -239,6 +333,32 @@ public class EvaluationOfTemporalRelatio
}
}
+ public static class RemoveNonContainsRelations extends JCasAnnotator_ImplBase {
+ public static final String PARAM_RELATION_VIEW = "RelationView";
+
+ @ConfigurationParameter(name = PARAM_RELATION_VIEW)
+ private String relationViewName = CAS.NAME_DEFAULT_SOFA;
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas relationView;
+ try {
+ relationView = jCas.getView(this.relationViewName);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+ relationView,
+ BinaryTextRelation.class))) {
+ if (!relation.getCategory().startsWith("CONTAINS")) {
+ relation.getArg1().removeFromIndexes();
+ relation.getArg2().removeFromIndexes();
+ relation.removeFromIndexes();
+ }
+ }
+ }
+ }
+
public static class RemoveRelations extends JCasAnnotator_ImplBase {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Tue Apr 30 15:57:24 2013
@@ -32,10 +32,12 @@ import java.util.regex.Pattern;
import org.apache.ctakes.chunker.ae.Chunker;
import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
import org.apache.ctakes.core.ae.OverlapAnnotator;
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.resource.FileResourceImpl;
import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
@@ -47,6 +49,7 @@ import org.apache.ctakes.lvg.ae.LvgAnnot
import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.temporal.ae.THYMEKnowtatorXMLReader;
+import org.apache.ctakes.temporal.ae.THYMETreebankReader;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -109,9 +112,21 @@ public abstract class Evaluation_ImplBas
@Option(longName = "patients")
public CommandLine.IntegerRanges getPatients();
- @Option(longName = "print-errors", defaultValue="false")
+ @Option(longName = "treebank", defaultToNull=true)
+ public File getTreebankDirectory();
+
+ @Option
+ public boolean getGrid();
+
+ @Option
public boolean getPrintErrors();
- }
+
+ @Option
+ public boolean getMergeOverlap();
+
+ @Option(longName = "kernelParams", defaultToNull=true)
+ public String getKernelParams();
+}
protected File rawTextDirectory;
@@ -121,18 +136,24 @@ public abstract class Evaluation_ImplBas
private boolean xmiExists;
- protected boolean printErrors;
+ protected File treebankDirectory;
+
+ protected boolean printErrors = false;
+
+ protected String[] kernelParams;
public Evaluation_ImplBase(
File baseDirectory,
File rawTextDirectory,
File knowtatorXMLDirectory,
- File xmiDirectory) {
+ File xmiDirectory,
+ File treebankDirectory) {
super(baseDirectory);
this.rawTextDirectory = rawTextDirectory;
this.knowtatorXMLDirectory = knowtatorXMLDirectory;
this.xmiDirectory = xmiDirectory;
this.xmiExists = this.xmiDirectory.exists() && this.xmiDirectory.listFiles().length > 0;
+ this.treebankDirectory = treebankDirectory;
}
public void prepareXMIsFor(List<Integer> patientSets) throws Exception {
@@ -221,7 +242,7 @@ public abstract class Evaluation_ImplBas
"MaxentModel",
ExternalResourceFactory.createExternalResourceDescription(
SuffixMaxentModelResourceImpl.class,
- SentenceDetector.class.getResource("../sentdetect/sdmed.mod"))));
+ FileLocator.locateFile("org/apache/ctakes/core/sentdetect/sdmed.mod").toURI().toURL())));
// identify tokens
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
// merge some tokens
@@ -243,7 +264,7 @@ public abstract class Evaluation_ImplBas
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
Chunker.class,
Chunker.CHUNKER_MODEL_FILE_PARAM,
- Chunker.class.getResource("../models/chunk-model.claims-1.5.zip").toURI().getPath(),
+ FileLocator.locateFile("org/apache/ctakes/chunker/models/chunk-model.claims-1.5.zip"),
Chunker.CHUNKER_CREATOR_CLASS_PARAM,
DefaultChunkCreator.class));
@@ -385,6 +406,13 @@ public abstract class Evaluation_ImplBas
// add semantic role labeler
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
+ // add constituency parser (or gold standard treebank if we have it)
+ if(this.treebankDirectory != null){
+ aggregateBuilder.add(THYMETreebankReader.getDescription(this.treebankDirectory));
+ }else{
+ aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
+ }
+
// write out the CAS after all the above annotations
aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
XMIWriter.class,