You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/12/06 16:33:05 UTC
svn commit: r1772914 - in
/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference:
ae/CoreferenceChainScoringOutput.java eval/EvaluationOfEventCoreference.java
Author: tmill
Date: Tue Dec 6 16:33:04 2016
New Revision: 1772914
URL: http://svn.apache.org/viewvc?rev=1772914&view=rev
Log:
Update to allow for coref evaluation given gold standard markables.
Modified:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java?rev=1772914&r1=1772913&r2=1772914&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java Tue Dec 6 16:33:04 2016
@@ -123,7 +123,7 @@ public class CoreferenceChainScoringOutp
// Annotation mention = (Annotation) ((NonEmptyFSList) members).getHead();
ent2chain.put(mention, chainNum);
members = ((NonEmptyFSList)members).getTail();
- System.out.print("Mention: " + mention.getCoveredText());
+ System.out.print("Mention: " + mention.getCoveredText().replace("\n", "<CR>"));
System.out.print(" (" + mention.getBegin() + ", " + mention.getEnd() + ")");
System.out.print(" -----> ");
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java?rev=1772914&r1=1772913&r2=1772914&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java Tue Dec 6 16:33:04 2016
@@ -10,6 +10,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -69,6 +70,7 @@ import org.apache.uima.analysis_engine.m
import org.apache.uima.analysis_engine.metadata.FlowConstraints;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
@@ -92,13 +94,12 @@ import org.apache.uima.jcas.cas.FloatArr
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
import org.apache.uima.util.FileUtils;
import org.cleartk.eval.AnnotationStatistics;
import org.cleartk.ml.CleartkAnnotator;
-import org.cleartk.ml.jar.DataWriterFactory_ImplBase;
import org.cleartk.ml.jar.DefaultDataWriterFactory;
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
-import org.cleartk.ml.jar.EncodingDirectoryDataWriterFactory;
import org.cleartk.ml.jar.JarClassifierBuilder;
import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
import org.cleartk.ml.svmlight.rank.SvmLightRankDataWriter;
@@ -138,6 +139,9 @@ public class EvaluationOfEventCoreferenc
public String getScorerPath();
@Option
+ public boolean getGoldMarkables();
+
+ @Option
public boolean getSkipTest();
}
@@ -186,15 +190,10 @@ public class EvaluationOfEventCoreferenc
options.getKernelParams(),
options.getOutputDirectory());
- if(options.getSkipTrain()){
- eval.skipTrain = true;
- }
- if(options.getSkipDataWriting()){
- eval.skipWrite = true;
- }
- if(options.getSkipTest()){
- eval.skipTest = true;
- }
+ eval.skipTrain = options.getSkipTrain();
+ eval.skipWrite = options.getSkipDataWriting();
+ eval.skipTest = options.getSkipTest();
+ eval.goldMarkables = options.getGoldMarkables();
eval.evalType = options.getEvalSystem();
eval.config = options.getConfig();
goldOut = "gold." + eval.config + ".conll";
@@ -246,6 +245,7 @@ public class EvaluationOfEventCoreferenc
boolean skipTrain=false;
boolean skipWrite=false;
boolean skipTest=false;
+ boolean goldMarkables=false;
public enum EVAL_SYSTEM { BASELINE, MENTION_PAIR, MENTION_CLUSTER, CLUSTER_RANK, PERSON_ONLY };
EVAL_SYSTEM evalType;
String config=null;
@@ -279,14 +279,20 @@ public class EvaluationOfEventCoreferenc
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, "Baseline"));
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphVectorAnnotator.class));
+// aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphVectorAnnotator.class));
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class));
aggregateBuilder.add(EventAnnotator.createAnnotatorDescription());
aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar"));
aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar"));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class));
- // aggregateBuilder.add(CopyFromGold.getDescription(/*Markable.class,*/ CoreferenceRelation.class, CollectionTextRelation.class));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class));
+ if(this.goldMarkables){
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyGoldMarkablesInChains.class));
+ }else{
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class));
+ // aggregateBuilder.add(CopyFromGold.getDescription(/*Markable.class,*/ CoreferenceRelation.class, CollectionTextRelation.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class));
+ }
+ // MarkableHeadTreeCreator creates a cache of mappings from Markables to dependency heads since so many feature extractors use that information
+ // major speedup
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableHeadTreeCreator.class));
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyCoreferenceRelations.class, CopyCoreferenceRelations.PARAM_GOLD_VIEW, GOLD_VIEW_NAME));
aggregateBuilder.add(MarkableSalienceAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/salience/model.jar"));
@@ -390,7 +396,7 @@ public class EvaluationOfEventCoreferenc
aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphVectorAnnotator.class));
+// aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphVectorAnnotator.class));
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class));
aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/timeannotator/model.jar"));
aggregateBuilder.add(EventAnnotator.createAnnotatorDescription());
@@ -400,8 +406,13 @@ public class EvaluationOfEventCoreferenc
this.outputDirectory + goldOut,
CoreferenceChainScoringOutput.PARAM_GOLD_VIEW_NAME,
GOLD_VIEW_NAME));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class));
- aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class));
+ if(this.goldMarkables){
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyGoldMarkablesInChains.class)); //CopyFromGold.getDescription(Markable.class));
+ }else{
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DeterministicMarkableAnnotator.class));
+ // aggregateBuilder.add(CopyFromGold.getDescription(/*Markable.class,*/ CoreferenceRelation.class, CollectionTextRelation.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class));
+ }
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(MarkableHeadTreeCreator.class));
aggregateBuilder.add(MarkableSalienceAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/salience/model.jar"));
if(this.evalType == EVAL_SYSTEM.MENTION_PAIR){
@@ -417,7 +428,9 @@ public class EvaluationOfEventCoreferenc
logger.info("Running an evaluation that does not add an annotator: " + this.evalType);
}
// aggregateBuilder.add(CoreferenceChainAnnotator.createAnnotatorDescription());
- aggregateBuilder.add(PersonChainAnnotator.createAnnotatorDescription());
+ if(!this.goldMarkables){
+ aggregateBuilder.add(PersonChainAnnotator.createAnnotatorDescription());
+ }
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CoreferenceChainScoringOutput.class,
CoreferenceChainScoringOutput.PARAM_OUTPUT_FILENAME,
this.outputDirectory + systemOut));
@@ -510,6 +523,42 @@ public class EvaluationOfEventCoreferenc
}
+ public static class CopyGoldMarkablesInChains extends org.apache.uima.fit.component.JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas goldView, systemView;
+ try {
+ goldView = jCas.getView( GOLD_VIEW_NAME );
+ systemView = jCas.getView( CAS.NAME_DEFAULT_SOFA );
+ } catch ( CASException e ) {
+ throw new AnalysisEngineProcessException( e );
+ }
+ // first remove any system markables that snuck in
+ for ( Markable annotation : Lists.newArrayList( JCasUtil.select( systemView, Markable.class ) ) ) {
+ annotation.removeFromIndexes();
+ }
+
+ CasCopier copier = new CasCopier( goldView.getCas(), systemView.getCas() );
+ Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName( CAS.FEATURE_FULL_NAME_SOFA );
+ HashSet<String> existingSpans = new HashSet<>();
+ for ( CollectionTextRelation chain : JCasUtil.select(goldView, CollectionTextRelation.class)){
+ for ( Markable markable : JCasUtil.select(chain.getMembers(), Markable.class)){
+ // some spans are annotated twice erroneously in gold -- if we can't fix make sure we don't add twice
+ // or else the evaluation script will explode.
+ String key = markable.getBegin() + "-" + (markable.getEnd() - markable.getBegin());
+ if(existingSpans.contains(key)) continue;
+
+ Markable copy = (Markable)copier.copyFs( markable );
+ copy.setFeatureValue( sofaFeature, systemView.getSofa() );
+ copy.addToIndexes( systemView );
+ existingSpans.add(key);
+ }
+ }
+ }
+
+
+ }
/*
* The Relation extractors all create relation objects but don't populate the objects inside of them
* with pointers to the relation.
@@ -686,6 +735,9 @@ public class EvaluationOfEventCoreferenc
do{
NonEmptyFSList element = (NonEmptyFSList) head;
Markable goldMarkable = (Markable) element.getHead();
+ if(goldMarkable == null){
+ logger.error(String.format("Found an unexpected null gold markable"));
+ }
boolean mapped = mapGoldMarkable(jcas, goldMarkable, gold2sys, depIndex);
// if we can't align the gold markable with one in the system cas then don't add it:
@@ -831,7 +883,8 @@ public class EvaluationOfEventCoreferenc
System.err.println("Unauthorized markable 'I'");
}
List<BaseToken> coveredTokens = JCasUtil.selectCovered(jcas, BaseToken.class, markable);
- if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") &&
+ if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null &&
+ coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") &&
!markable.getCoveredText().toLowerCase().equals("it")){
toRemove.add(markable);
}else if(coveredTokens.size() > 0 && (coveredTokens.get(0).getCoveredText().startsWith("Mr.") || coveredTokens.get(0).getCoveredText().startsWith("Dr.") ||