You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/06/27 17:29:00 UTC
svn commit: r1497397 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/
eval/
Author: tmill
Date: Thu Jun 27 15:29:00 2013
New Revision: 1497397
URL: http://svn.apache.org/r1497397
Log:
Checked in changes to allow for meta-timex classifier.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java Thu Jun 27 15:29:00 2013
@@ -1,5 +1,6 @@
package org.apache.ctakes.temporal.ae;
+import java.io.File;
import java.util.ArrayList;
import java.util.List;
@@ -10,9 +11,13 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.chunking.BIOChunking;
@@ -25,12 +30,40 @@ import org.cleartk.classifier.feature.ex
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.util.JCasUtil;
import com.google.common.collect.Lists;
-public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase{
+public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase {
+
+ public static AnalysisEngineDescription createDataWriterDescription(
+ Class<? extends DataWriter<String>> dataWriterClass, File outputDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ BackwardsTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ true,
+ DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+ dataWriterClass,
+ DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+ outputDirectory);
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription(
+ File modelDirectory) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createPrimitiveDescription(
+ BackwardsTimeAnnotator.class,
+ CleartkAnnotator.PARAM_IS_TRAINING,
+ false,
+ GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+ new File(modelDirectory, "model.jar"));
+ }
+
protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
@@ -141,9 +174,14 @@ public class BackwardsTimeAnnotator exte
if (!this.isTraining()) {
tokens = Lists.reverse(tokens);
outcomes = Lists.reverse(outcomes);
- this.timeChunking.createChunks(jCas, tokens, outcomes);
+ JCas timexCas;
+ try{
+ timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+ }catch(CASException e){
+ throw new AnalysisEngineProcessException(e);
+ }
+ this.timeChunking.createChunks(timexCas, tokens, outcomes);
}
}
}
-
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java Thu Jun 27 15:29:00 2013
@@ -4,17 +4,16 @@ import java.io.File;
import java.util.ArrayList;
import java.util.List;
-import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
@@ -25,12 +24,10 @@ import org.cleartk.classifier.chunking.B
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
@@ -119,10 +116,10 @@ public class CRFTimeAnnotator extends Te
}
// extract features for all tokens
- int tokenIndex = -1;
+// int tokenIndex = -1;
List<List<Feature>> allFeatures = new ArrayList<List<Feature>>();
for (BaseToken token : tokens) {
- ++tokenIndex;
+// ++tokenIndex;
List<Feature> features = new ArrayList<Feature>();
// features from token attributes
@@ -153,7 +150,7 @@ public class CRFTimeAnnotator extends Te
// }
// startToken = tokens.get(i);
// }
- TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token);
+// TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token);
features.addAll(parseExtractor.extract(jCas, token.getBegin(), token.getEnd()));
//if(preTerm != null && preTerm.getParent() != null){
// features.addAll(parseExtractor.extract(jCas, preTerm.getParent().getBegin(), preTerm.getParent().getEnd()));
@@ -176,7 +173,13 @@ public class CRFTimeAnnotator extends Te
}else{
// outcomes.add(this.classifier.classify(features));
outcomes = this.classifier.classify(allFeatures);
- this.timeChunking.createChunks(jCas, tokens, outcomes);
+ JCas timexCas;
+ try {
+ timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ this.timeChunking.createChunks(timexCas, tokens, outcomes);
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java Thu Jun 27 15:29:00 2013
@@ -15,9 +15,11 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
@@ -25,7 +27,8 @@ import org.cleartk.classifier.DataWriter
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import static org.cleartk.classifier.feature.extractor.CleartkExtractor.*;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
@@ -48,9 +51,9 @@ TemporalEntityAnnotator_ImplBase {
private static final String NON_MENTION = "NON_TIME_MENTION";
private static final String MENTION = "TIME_MENTION";
+ private static Logger logger = Logger.getLogger(ConstituencyBasedTimeAnnotator.class);
private static final int SPAN_LIMIT = 12;
-
public static AnalysisEngineDescription createDataWriterDescription(
Class<? extends DataWriter<String>> dataWriterClass,
File outputDirectory) throws ResourceInitializationException {
@@ -82,13 +85,14 @@ TemporalEntityAnnotator_ImplBase {
private Map<String, String> wordTypes;
- private Set<String> timeWords;
-
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
super.initialize(context);
+ CombinedExtractor charExtractors = new CombinedExtractor(new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
+ new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR));
+
this.wordTypes = Maps.newHashMap();
URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH);
try {
@@ -102,18 +106,17 @@ TemporalEntityAnnotator_ImplBase {
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
- this.timeWords = this.wordTypes.keySet();
CombinedExtractor allExtractors = new CombinedExtractor(
new CoveredTextExtractor(),
// new TimeWordTypeExtractor(),
- new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
- new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+ charExtractors,
new TypePathExtractor(BaseToken.class, "partOfSpeech"));
-
+
featureExtractors = new ArrayList<SimpleFeatureExtractor>();
// featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Covered())));
featureExtractors.add(new CleartkExtractor(BaseToken.class, allExtractors, new Bag(new Covered())));
+// featureExtractors.add(charExtractors);
wordTypeExtractor = new CleartkExtractor(BaseToken.class, new TimeWordTypeExtractor(), new Bag(new Covered()));
// featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Preceding(1))));
// featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Following(1))));
@@ -137,13 +140,14 @@ TemporalEntityAnnotator_ImplBase {
// }
for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.class, segment)){
- recursivelyProcessNode(jCas, root.getChildren(0), NON_MENTION, mentions);
+ recursivelyProcessNode(jCas, root.getChildren(0), mentions, 0.0);
}
}
- private void recursivelyProcessNode(JCas jCas, TreebankNode node, String parentCategory, Set<TimeMention> mentions) throws AnalysisEngineProcessException {
+ private double recursivelyProcessNode(JCas jCas, TreebankNode node, Set<TimeMention> mentions, double parentScore) throws AnalysisEngineProcessException {
// accumulate features:
- double score=0.0;
+ double score=0.0;
+ parentScore = 0.0;
ArrayList<Feature> features = new ArrayList<Feature>();
String category = NON_MENTION;
@@ -151,21 +155,10 @@ TemporalEntityAnnotator_ImplBase {
if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT"));
features.add(new Feature("NODE_LABEL", node.getNodeType()));
features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType()));
- features.add(new Feature("PARENT_CAT", parentCategory));
+ List<BaseToken> coveredTokens = JCasUtil.selectCovered(BaseToken.class, node);
//check span length, check if a small node contains any time word
- int numTokens = JCasUtil.selectCovered(BaseToken.class, node).size();
- boolean containTimeWord = false;
- boolean containGoldTime = false;
- //if (numTokens < SPAN_LIMIT){//check if it contains time word
- for(BaseToken bt : JCasUtil.selectCovered(BaseToken.class, node)){
- String btword = bt.getCoveredText().toLowerCase();
- if(this.timeWords.contains(btword)){
- containTimeWord = true;
- break;
- }
- }
- //}
+ int numTokens = coveredTokens.size();
if(node.getLeaf()){
features.add(new Feature("IS_LEAF"));
@@ -190,72 +183,65 @@ TemporalEntityAnnotator_ImplBase {
if(this.isTraining()){
List<TimeMention> goldMentions = JCasUtil.selectCovered(TimeMention.class, node);
- if( goldMentions != null){
- containGoldTime = true;
-
- for(TimeMention mention : goldMentions){
- if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
- category = MENTION;
- score=1.0;
- mentions.remove(mention);
- }
- }
+ for(TimeMention mention : goldMentions){
+ if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
+ category = MENTION;
+ score=1.0;
+ mentions.remove(mention);
+ if(node.getCoveredText().contains("postoperative")){
+ System.out.println("*** Positive Example: ***");
+ System.out.println("*** Parent: " + node.getParent().getCoveredText());
+ printFeatures(node, features);
+ }
+ }
}
if(numTokens < SPAN_LIMIT){
- this.dataWriter.write(new Instance<String>(category, features));
+ this.dataWriter.write(new Instance<String>(category, features));
}
}else{
score = this.classifier.score(features, 1).get(0).getScore();
category = this.classifier.classify(features);
if(category.equals(MENTION)){
// add to cas
- TimeMention mention = new TimeMention(jCas, node.getBegin(), node.getEnd());
+ JCas timexCas;
+ try {
+ timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ TimeMention mention = new TimeMention(timexCas, node.getBegin(), node.getEnd());
mention.setConfidence((float)score);
mention.addToIndexes();
+ }else{
+ score = 1 - score;
}
}
// now do children if not a leaf & not a mention
- if(node.getLeaf() || MENTION.equals(category)) return;
+ if(node.getLeaf() || MENTION.equals(category)) return score;
- //double highestScoringChild = 0.0;
- if(!containGoldTime && !containTimeWord && numTokens >= SPAN_LIMIT) return;
+ double highestScore = 0.5;
+ TreebankNode highestScoringChild = null;
- if(!node.getLeaf()){
- for(int i = 0; i < node.getChildren().size(); i++){
- TreebankNode child = node.getChildren(i);
- recursivelyProcessNode(jCas, child, category, mentions);
- }
+ for(int i = 0; i < node.getChildren().size(); i++){
+ TreebankNode child = node.getChildren(i);
+ double childScore = recursivelyProcessNode(jCas, child, mentions, Math.max(score, parentScore));
+ if(childScore > highestScore){
+ highestScoringChild = child;
+ highestScore = childScore;
+ }
}
-
-
-// if(MENTION.equals(category) && score > highestScoringChild && score > parentScore){
-
-// }
+ if(!this.isTraining() && MENTION.equals(category)){
+ logger.info(String.format("\nFound mention (%s) with score %f\n\tParent (%s) : %f\n\tBest child (%s) : %f\n", node.getCoveredText(), score, node.getParent().getCoveredText(), parentScore, highestScoringChild == null ? "(none)" : highestScoringChild.getCoveredText(), highestScore));
+ }
+ return score;
}
-// private static String getSiblingCategory(TreebankNode node, int offset) throws AnalysisEngineProcessException{
-// String cat = null;
-//
-// TreebankNode parent = node.getParent();
-// int nodeIndex = -1;
-// for(int i = 0; i < parent.getChildren().size(); i++){
-// if(parent.getChildren(i) == node){
-// nodeIndex = i;
-// break;
-// }
-// }
-//
-// if(nodeIndex == -1){
-// throw new AnalysisEngineProcessException();
-// }else if(nodeIndex + offset < 0){
-// cat = "<";
-// }else if(nodeIndex + offset >= parent.getChildren().size()){
-// cat = ">";
-// }else{
-// cat = parent.getChildren(nodeIndex+offset).getNodeType();
-// }
-//
-// return cat;
-// }
+ private static void printFeatures(TreebankNode node, List<Feature> features) {
+ System.out.println(node.getCoveredText());
+ for(Feature feat : features){
+ System.out.printf("%s => %s\n", feat.getName(), feat.getValue());
+ }
+ }
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java Thu Jun 27 15:29:00 2013
@@ -31,6 +31,7 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
@@ -55,6 +56,8 @@ import org.uimafit.util.JCasUtil;
public class TimeAnnotator extends TemporalEntityAnnotator_ImplBase {
+ public static final String TIMEX_VIEW = "TimexView";
+
public static AnalysisEngineDescription createDataWriterDescription(
Class<? extends DataWriter<String>> dataWriterClass,
File outputDirectory) throws ResourceInitializationException {
@@ -185,7 +188,13 @@ public class TimeAnnotator extends Tempo
// during prediction, convert chunk labels to times and add them to the CAS
if (!this.isTraining()) {
- this.timeChunking.createChunks(jCas, tokens, outcomes);
+ JCas timexCas;
+ try {
+ timexCas = jCas.getView(TIMEX_VIEW);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ this.timeChunking.createChunks(timexCas, tokens, outcomes);
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Thu Jun 27 15:29:00 2013
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
@@ -98,7 +99,7 @@ protected abstract AnalysisEngineDescrip
protected void train(CollectionReader collectionReader, File directory) throws Exception {
AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
aggregateBuilder.add(CopyFromGold.getDescription(this.annotationClass));
- aggregateBuilder.add(this.getDataWriterDescription(directory));
+ aggregateBuilder.add(this.getDataWriterDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
this.trainAndPackage(directory);
}
@@ -114,7 +115,7 @@ protected abstract AnalysisEngineDescrip
protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
throws Exception {
AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
- aggregateBuilder.add(this.getAnnotatorDescription(directory));
+ aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
@@ -174,6 +175,45 @@ protected abstract AnalysisEngineDescrip
text.substring(end, windowEnd)));
}
}
+ Set<Annotation> partialGold = new HashSet<Annotation>();
+ Set<Annotation> partialSystem = new HashSet<Annotation>();
+
+ // get overlapping spans
+ if(this.printOverlapping){
+ // iterate over all remaining gold annotations
+ for(Annotation gold : goldOnly){
+ Annotation bestSystem = null;
+ int bestOverlap = 0;
+ for(Annotation system : systemOnly){
+ if(system.getBegin() >= gold.getBegin() && system.getEnd() <= gold.getEnd()){
+ // system completely contained by gold
+ int overlap = system.getEnd() - system.getBegin();
+ if(overlap > bestOverlap){
+ bestOverlap = overlap;
+ bestSystem = system;
+ }
+ }else if(gold.getBegin() >= system.getBegin() && gold.getEnd() <= system.getEnd()){
+ // gold completely contained by gold
+ int overlap = gold.getEnd() - gold.getBegin();
+ if(overlap > bestOverlap){
+ bestOverlap = overlap;
+ bestSystem = system;
+ }
+ }
+ }
+ if(bestSystem != null){
+ this.logger.info(String.format("Allowed overlapping annotation: Gold(%s) => System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText()));
+ partialGold.add(gold);
+ partialSystem.add(bestSystem);
+ }
+ }
+ if(partialGold.size() > 0){
+ goldOnly.removeAll(partialGold);
+ systemOnly.removeAll(partialSystem);
+ assert partialGold.size() == partialSystem.size();
+ this.logger.info(String.format("Found %d overlapping spans and removed from gold/system errors\n", partialGold.size()));
+ }
+ }
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Thu Jun 27 15:29:00 2013
@@ -24,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.logging.Level;
+import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
import org.apache.ctakes.temporal.ae.CRFTimeAnnotator;
import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.MetaTimeAnnotator;
import org.apache.ctakes.temporal.ae.TimeAnnotator;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
@@ -62,13 +64,17 @@ public class EvaluationOfTimeSpans exten
// specify the annotator classes to use
List<Class<? extends JCasAnnotator_ImplBase>> annotatorClasses = Lists.newArrayList();
+ annotatorClasses.add(BackwardsTimeAnnotator.class);
annotatorClasses.add(TimeAnnotator.class);
annotatorClasses.add(ConstituencyBasedTimeAnnotator.class);
annotatorClasses.add(CRFTimeAnnotator.class);
+ annotatorClasses.add(MetaTimeAnnotator.class);
Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments = Maps.newHashMap();
+ annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.1"});
annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c", "0.1"});
annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.1"});
+ annotatorTrainingArguments.put(MetaTimeAnnotator.class, new String[]{"-c", "1.0"});
// run one evaluation per annotator class
final Map<Class<?>, AnnotationStatistics<?>> annotatorStats = Maps.newHashMap();
@@ -80,6 +86,7 @@ public class EvaluationOfTimeSpans exten
options.getXMIDirectory(),
options.getTreebankDirectory(),
annotatorClass,
+ options.getPrintOverlappingSpans(),
annotatorTrainingArguments.get(annotatorClass));
evaluation.prepareXMIsFor(patientSets);
String name = String.format("%s.errors", annotatorClass.getSimpleName());
@@ -116,16 +123,20 @@ public class EvaluationOfTimeSpans exten
File xmiDirectory,
File treebankDirectory,
Class<? extends JCasAnnotator_ImplBase> annotatorClass,
+ boolean printOverlapping,
String[] trainingArguments) {
super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory, TimeMention.class);
this.annotatorClass = annotatorClass;
this.trainingArguments = trainingArguments;
+ this.printOverlapping = printOverlapping;
}
@Override
protected AnalysisEngineDescription getDataWriterDescription(File directory)
throws ResourceInitializationException {
- if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){
+ if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){
+ return MetaTimeAnnotator.getDataWriterDescription(LIBLINEARStringOutcomeDataWriter.class, directory);
+ }else if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){
return AnalysisEngineFactory.createPrimitiveDescription(
this.annotatorClass,
CleartkAnnotator.PARAM_IS_TRAINING,
@@ -156,6 +167,9 @@ public class EvaluationOfTimeSpans exten
@Override
protected AnalysisEngineDescription getAnnotatorDescription(File directory)
throws ResourceInitializationException {
+ if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){
+ return MetaTimeAnnotator.getAnnotatorDescription(directory);
+ }
return AnalysisEngineFactory.createPrimitiveDescription(
this.annotatorClass,
CleartkAnnotator.PARAM_IS_TRAINING,
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Thu Jun 27 15:29:00 2013
@@ -123,6 +123,9 @@ public abstract class Evaluation_ImplBas
@Option
public boolean getPrintErrors();
+ @Option
+ public boolean getPrintOverlappingSpans();
+
@Option(longName = "kernelParams", defaultToNull=true)
public String getKernelParams();
}
@@ -139,6 +142,8 @@ public abstract class Evaluation_ImplBas
protected boolean printErrors = false;
+ protected boolean printOverlapping = false;
+
protected String[] kernelParams;
public Evaluation_ImplBase(