You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/30 23:05:39 UTC
svn commit: r1717335 -
/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java
Author: tmill
Date: Mon Nov 30 22:05:39 2015
New Revision: 1717335
URL: http://svn.apache.org/viewvc?rev=1717335&view=rev
Log:
Added other attributes to analysis.
Modified:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java?rev=1717335&r1=1717334&r2=1717335&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java Mon Nov 30 22:05:39 2015
@@ -5,7 +5,10 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
import org.apache.ctakes.coreference.ae.DeterministicMarkableAnnotator;
import org.apache.ctakes.coreference.ae.MarkableSalienceAnnotator;
@@ -54,6 +57,9 @@ public class CoreferenceAttributeAnalyze
options.getXMIDirectory()));
aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
+ aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
+ aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
+ aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class));
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class));
aggregateBuilder.add(EventAnnotator.createAnnotatorDescription());
@@ -66,6 +72,9 @@ public class CoreferenceAttributeAnalyze
int numNegChainMatches = 0;
int numUncChainMatches = 0;
+ int numGenChainMatches = 0;
+ int numSubjChainMatches = 0;
+ int numHistChainMatches = 0;
int numChains = 0;
for(JCas jcas : new JCasIterable(reader, aggregateBuilder.createAggregateDescription())){
@@ -73,6 +82,9 @@ public class CoreferenceAttributeAnalyze
for(CollectionTextRelation chain : JCasUtil.select(jcas, CollectionTextRelation.class)){
int numNeg = 0;
int numUnc = 0;
+ int numGen = 0;
+ int numSubj = 0;
+ int numHist = 0;
int numTimex = 0;
int numMarkables = 0;
Markable head = (Markable) ((NonEmptyFSList)chain.getMembers()).getHead();
@@ -90,6 +102,15 @@ public class CoreferenceAttributeAnalyze
if(ent.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT){
numUnc++;
}
+ if(ent.getGeneric() == CONST.NE_GENERIC_TRUE){
+ numGen++;
+ }
+ if(ent.getSubject() == CONST.ATTR_SUBJECT_PATIENT){
+ numSubj++;
+ }
+ if(ent.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT){
+ numHist++;
+ }
}
// only bother if some of the chains had markables corresponding to named entities:
if(numMarkables > 0){
@@ -97,16 +118,27 @@ public class CoreferenceAttributeAnalyze
if(numNeg == 0 || numNeg == numMarkables){
numNegChainMatches++;
}else{
- logger.warn("Found a chain with disagreement over negation: " +
- String.format("First element: \"%s\", span: (%d, %d)",
- head.getCoveredText(), head.getBegin(), head.getEnd()));
+ logMismatch("Negation", head);
}
if(numUnc == 0 || numUnc == numMarkables){
numUncChainMatches++;
}else{
- logger.warn("Found a chain with disagreement over uncertainty: " +
- String.format("First element: \"%s\", span: (%d, %d)",
- head.getCoveredText(), head.getBegin(), head.getEnd()));
+ logMismatch("Uncertainty", head);
+ }
+ if(numGen == 0 || numGen == numMarkables){
+ numGenChainMatches++;
+ }else{
+ logMismatch("Generic", head);
+ }
+ if(numSubj == 0 || numSubj == numMarkables){
+ numSubjChainMatches++;
+ }else{
+ logMismatch("Subject", head);
+ }
+ if(numHist == 0 || numHist == numMarkables){
+ numHistChainMatches++;
+ }else{
+ logMismatch("History", head);
}
}
}
@@ -114,13 +146,23 @@ public class CoreferenceAttributeAnalyze
}
// print out some statistics:
- System.out.println(String.format("Negation: There are %d chains in the corpus with some UMLS named entity element and %d of them negation status agrees",
- numChains, numNegChainMatches));
- System.out.println(String.format("Negation: There are %d chains in the corpus with some UMLS named entity element and %d of them uncertainty status agrees",
- numChains, numUncChainMatches));
-
+ printOutcome("Negation", numChains, numNegChainMatches);
+ printOutcome("Uncertainty", numChains, numUncChainMatches);
+ printOutcome("Generic", numChains, numGenChainMatches);
+ printOutcome("Subject", numChains, numSubjChainMatches);
+ printOutcome("History", numChains, numHistChainMatches);
}
+ private static void printOutcome(String attName, int numChains, int numMatches){
+ System.out.println(String.format("%s: There are %d chains in the corpus with some UMLS named entity element and %d of them %s status agrees",
+ attName, numChains, numMatches, attName));
+ }
+
+ private static void logMismatch(String attName, Markable head){
+ logger.warn(String.format("Found a chain with disagreement over %s: First element: \"%s\", span: (%d, %d)",
+ attName, head.getCoveredText(), head.getBegin(), head.getEnd()));
+ }
+
private static IdentifiedAnnotation getSameHeadEntity(JCas jcas,
Markable member) {
IdentifiedAnnotation bestEnt = null;