You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/30 23:05:39 UTC

svn commit: r1717335 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java

Author: tmill
Date: Mon Nov 30 22:05:39 2015
New Revision: 1717335

URL: http://svn.apache.org/viewvc?rev=1717335&view=rev
Log:
Added other attributes to analysis.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java?rev=1717335&r1=1717334&r2=1717335&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/data/CoreferenceAttributeAnalyzer.java Mon Nov 30 22:05:39 2015
@@ -5,7 +5,10 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
 import org.apache.ctakes.coreference.ae.DeterministicMarkableAnnotator;
 import org.apache.ctakes.coreference.ae.MarkableSalienceAnnotator;
@@ -54,6 +57,9 @@ public class CoreferenceAttributeAnalyze
         options.getXMIDirectory()));
     aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
     aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class));
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RelationPropagator.class));
     aggregateBuilder.add(EventAnnotator.createAnnotatorDescription());
@@ -66,6 +72,9 @@ public class CoreferenceAttributeAnalyze
 
     int numNegChainMatches = 0;
     int numUncChainMatches = 0;
+    int numGenChainMatches = 0;
+    int numSubjChainMatches = 0;
+    int numHistChainMatches = 0;
     int numChains = 0;
     
     for(JCas jcas : new JCasIterable(reader, aggregateBuilder.createAggregateDescription())){
@@ -73,6 +82,9 @@ public class CoreferenceAttributeAnalyze
       for(CollectionTextRelation chain : JCasUtil.select(jcas, CollectionTextRelation.class)){
         int numNeg = 0;
         int numUnc = 0;
+        int numGen = 0;
+        int numSubj = 0;
+        int numHist = 0;
         int numTimex = 0;
         int numMarkables = 0;
         Markable head = (Markable) ((NonEmptyFSList)chain.getMembers()).getHead();
@@ -90,6 +102,15 @@ public class CoreferenceAttributeAnalyze
           if(ent.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT){
             numUnc++;
           }
+          if(ent.getGeneric() == CONST.NE_GENERIC_TRUE){
+            numGen++;
+          }
+          if(ent.getSubject() == CONST.ATTR_SUBJECT_PATIENT){
+            numSubj++;
+          }
+          if(ent.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT){
+            numHist++;
+          }
         }
         // only bother if some of the chains had markables corresponding to named entities:
         if(numMarkables > 0){
@@ -97,16 +118,27 @@ public class CoreferenceAttributeAnalyze
           if(numNeg == 0 || numNeg == numMarkables){
             numNegChainMatches++;
           }else{
-            logger.warn("Found a chain with disagreement over negation: " +
-               String.format("First element: \"%s\", span: (%d, %d)",
-                   head.getCoveredText(), head.getBegin(), head.getEnd()));
+            logMismatch("Negation", head);
           }
           if(numUnc == 0 || numUnc == numMarkables){
             numUncChainMatches++;
           }else{
-            logger.warn("Found a chain with disagreement over uncertainty: " +
-               String.format("First element: \"%s\", span: (%d, %d)",
-                   head.getCoveredText(), head.getBegin(), head.getEnd()));
+            logMismatch("Uncertainty", head);
+          }
+          if(numGen == 0 || numGen == numMarkables){
+            numGenChainMatches++;
+          }else{
+            logMismatch("Generic", head);           
+          }
+          if(numSubj == 0 || numSubj == numMarkables){
+            numSubjChainMatches++;
+          }else{
+            logMismatch("Subject", head);
+          }
+          if(numHist == 0 || numHist == numMarkables){
+            numHistChainMatches++;
+          }else{
+            logMismatch("History", head);
           }
         }
       }
@@ -114,13 +146,23 @@ public class CoreferenceAttributeAnalyze
     }
     
     // print out some statistics:
-    System.out.println(String.format("Negation: There are %d chains in the corpus with some UMLS named entity element and %d of them negation status agrees",
-        numChains, numNegChainMatches));
-    System.out.println(String.format("Negation: There are %d chains in the corpus with some UMLS named entity element and %d of them uncertainty status agrees",
-        numChains, numUncChainMatches));
-    
+    printOutcome("Negation", numChains, numNegChainMatches);
+    printOutcome("Uncertainty", numChains, numUncChainMatches);
+    printOutcome("Generic", numChains, numGenChainMatches);
+    printOutcome("Subject", numChains, numSubjChainMatches);
+    printOutcome("History", numChains, numHistChainMatches);
   }
 
+  private static void printOutcome(String attName, int numChains, int numMatches){
+    System.out.println(String.format("%s: There are %d chains in the corpus with some UMLS named entity element and %d of them %s status agrees",
+        attName, numChains, numMatches, attName));    
+  }
+  
+  private static void logMismatch(String attName, Markable head){
+    logger.warn(String.format("Found a chain with disagreement over %s: First element: \"%s\", span: (%d, %d)",
+        attName, head.getCoveredText(), head.getBegin(), head.getEnd()));
+  }
+  
   private static IdentifiedAnnotation getSameHeadEntity(JCas jcas,
       Markable member) {
     IdentifiedAnnotation bestEnt = null;