You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/11/25 22:25:27 UTC

svn commit: r1545413 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: eval/AssertionEvaluation.java train/PolarityCotrainingTests.java train/TestAttributeModels.java

Author: swu
Date: Mon Nov 25 21:25:26 2013
New Revision: 1545413

URL: http://svn.apache.org/r1545413
Log:
some edits to assertion eval scripts

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon Nov 25 21:25:26 2013
@@ -224,6 +224,12 @@ private static Logger logger = Logger.ge
     public boolean printErrors = false;
 
     @Option(
+    		name = "--print-instances",
+    		usage = "Flag to have test method print out lots of info for statistical significance testing",
+    		required = false)
+    public File printInstances;
+
+    @Option(
     		name = "--eval-only",
     		usage = "Evaluate a CASes (supply the directory as an argument) with both system and gold in them.",
     		required = false)
@@ -421,6 +427,7 @@ private static Logger logger = Logger.ge
 	  options.testOnly = false;
 	  options.noCleartk = false;
 	  options.printErrors = false;
+	  options.printInstances = null;
 	  options.evalOnly = false;
 	  
 	  options.evaluationOutputDirectory = null;
@@ -941,6 +948,9 @@ public static void printScore(Map<String
 	      if(options.printErrors){
 	    	  printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class);
 	      }
+	      if(options.printInstances!=null){
+	    	  printInstances(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class, options.printInstances);
+	      }
       }
 
       if (!options.ignoreConditional)
@@ -1212,6 +1222,100 @@ private static void printErrors(JCas jCa
 	  }
   }
   
+private static void printInstances(JCas jCas,
+		  Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
+		  Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType, 
+		  Object trueCategory, Class<? extends Object> categoryClass,
+		  File outputfile) 
+				  throws ResourceProcessException, IOException {
+
+	String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+	  BufferedWriter fileOutWriter = new BufferedWriter(new FileWriter(outputfile,true), 32768);
+
+	  Map<HashableAnnotation, IdentifiedAnnotation> goldMap = Maps.newHashMap();
+	  for (IdentifiedAnnotation mention : goldEntitiesAndEvents) {
+		  goldMap.put(new HashableAnnotation(mention), mention);
+	  }
+	  Map<HashableAnnotation, IdentifiedAnnotation> systemMap = Maps.newHashMap();
+	  for (IdentifiedAnnotation relation : systemEntitiesAndEvents) {
+		  systemMap.put(new HashableAnnotation(relation), relation);
+	  }
+	  Set<HashableAnnotation> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+	  List<HashableAnnotation> sorted = Lists.newArrayList(all);
+	  Collections.sort(sorted);
+	  for (HashableAnnotation key : sorted) {
+		  IdentifiedAnnotation goldAnnotation = goldMap.get(key);
+		  IdentifiedAnnotation systemAnnotation = systemMap.get(key);
+		  Object goldLabel=null;
+		  Object systemLabel=null;
+		  if (goldAnnotation == null) {
+			  logger.debug(key + " not found in gold annotations ");
+		  } else {
+			  Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
+			  goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
+			  //  Integer goldLabel = goldAnnotation.getIntValue(feature);
+		  }
+		  
+		  if (systemAnnotation == null) {
+			  logger.info(key + " not found in system annotations ");
+		  } else {
+			  Feature feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
+			  systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
+			  //  Integer systemLabel = systemAnnotation.getIntValue(feature);
+		  }
+		  
+		  String typeId = "X";
+		  String typeName = "IdentifiedAnnotation";
+		  int polarity, uncertainty, historyOf;
+		  boolean conditional, generic;
+		  String subject = "";
+		  String cui, coveredText = "";
+		  String instanceData = "";
+		  if (systemAnnotation!=null && systemAnnotation.getEnd()>=0) {
+			  typeId      = systemAnnotation.getTypeID()+"";
+			  typeName    = systemAnnotation.getClass().getSimpleName();
+			  polarity    = systemAnnotation.getPolarity();
+			  uncertainty = systemAnnotation.getUncertainty();
+			  conditional = systemAnnotation.getConditional();
+			  generic     = systemAnnotation.getGeneric();
+			  subject     = systemAnnotation.getSubject();
+			  historyOf   = systemAnnotation.getHistoryOf();
+			  coveredText = systemAnnotation.getCoveredText().replaceAll("\\n", " ").replaceAll(",",";");
+			  instanceData = documentId+","+polarity+","+uncertainty+","+conditional+","+generic+","+subject+","+historyOf+","+
+					  typeId+","+typeName+","+coveredText;
+		  }
+		  
+		  if (goldLabel==null) {
+			  // skip counting the attribute value since we have no gold label to compare to
+			  logger.debug("Skipping annotation with label " + systemLabel + " because gold label is null");
+		  } else if (instanceData.equals("")) {
+			  continue;
+		  }
+		  else  {
+			  if(!goldLabel.equals(systemLabel)){
+				  if(trueCategory == null){
+					  // used for multi-class case.  Incorrect_system_label(Correct_label):
+					  fileOutWriter.write(classifierType+",F,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+				  }else if(systemLabel.equals(trueCategory)){
+					  fileOutWriter.write(classifierType+",FP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+				  }else{
+					  fileOutWriter.write(classifierType+",FN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+				  }
+			  }else{
+			    if(trueCategory == null){
+			      // multi-class case -- probably don't want to print anything?
+			    	fileOutWriter.write(classifierType+ ",T,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+			    }else if(systemLabel.equals(trueCategory)){
+			    	fileOutWriter.write(classifierType+",TP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+				  }else{
+					  fileOutWriter.write(classifierType+",TN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+				  }
+			  }
+			  fileOutWriter.flush();
+		  }
+	  }
+	  fileOutWriter.close();
+	}
   private static Object getFeatureValue(Feature feature,
 		  Class<? extends Object> class1, Annotation annotation) throws ResourceProcessException {
 	  if(class1 == Integer.class){

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java Mon Nov 25 21:25:26 2013
@@ -1,5 +1,6 @@
 package org.apache.ctakes.assertion.train;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.Date;
 
@@ -47,24 +48,27 @@ public class PolarityCotrainingTests {
 		testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL,  	NEGEX_TEST));  // not valid
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	SHARP_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	I2B2_TEST));
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	MIPACQ_TEST)); // not meaningful
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	NEGEX_TEST));  // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	MIPACQ_TEST)); // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	NEGEX_TEST));  // not meaningful
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  SHARP_TEST));
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  I2B2_TEST));    // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  I2B2_TEST));    // not meaningful
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  MIPACQ_TEST));
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  NEGEX_TEST));
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	SHARP_TEST)); // not meaningful
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	I2B2_TEST));  //not meaningful
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	MIPACQ_TEST)); // not meaningful
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	NEGEX_TEST));  // not valid
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	SHARP_TEST)); // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	I2B2_TEST));  //not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	MIPACQ_TEST)); // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	NEGEX_TEST));  // not valid
 		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MIPACQ_NEGEX_MODEL,  	SHARP_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_NEGEX_MODEL,  	I2B2_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_NEGEX_MODEL,  		MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL,  	MIPACQ_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL,  	NEGEX_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	SHARP_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	I2B2_TEST));
 		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	MIPACQ_TEST));
-//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	NEGEX_TEST)); //not valid
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	NEGEX_TEST)); //not valid
 
 		
 		String attribute = "polarity";
@@ -72,12 +76,18 @@ public class PolarityCotrainingTests {
 		for (TestPair oneTest : testGrid) {
 			ArrayList<String> params = new ArrayList<String>();
 
+			File instancef = new File("eval/instances_"+
+			oneTest.model.substring(oneTest.model.lastIndexOf("/")+1)+"_"+
+			oneTest.data.substring(oneTest.data.length()-13).replaceAll("\\/", "-"));
+			
 			params.add("--test-dir"); 	params.add(oneTest.data);
 			params.add("--models-dir"); params.add(oneTest.model);
 			//			params.add("--ytex-negation");
 			//		params.add("--evaluation-output-dir");	params.add(AssertionConst.evalOutputDir);
 			params.add("--test-only");	
-//			params.add("--print-errors");
+			params.add("--print-instances");
+			// hack-y way to name this
+			params.add(instancef.getAbsolutePath());
 
 			// Build up an "ignore" string
 			for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java Mon Nov 25 21:25:26 2013
@@ -1,6 +1,7 @@
 package org.apache.ctakes.assertion.train;
 
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 
 import org.apache.ctakes.assertion.eval.AssertionEvaluation;
@@ -15,6 +16,7 @@ import scala.actors.threadpool.Arrays;
  * Note that this uses constants within {@link AssertionConst} for the directory names.
  */
 public class TestAttributeModels {
+	final static String RUN_ID = "ytex_";
 
 	public static void main(String[] args) throws Exception {
 		
@@ -22,10 +24,11 @@ public class TestAttributeModels {
 			
 			ArrayList<String> params = new ArrayList<String>();
 			AssertionEvaluation.useEvaluationLogFile = true;
+			AssertionEvaluation.evaluationLogFilePath = "eval/"+RUN_ID+new Date().toString().replaceAll(" ","_") + ".txt";
 			
 			params.add("--test-dir"); 	params.add(AssertionConst.testDirectories.get(attribute));
 			params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
-//			params.add("--ytex-negation");
+			params.add("--ytex-negation");
 			params.add("--evaluation-output-dir");	params.add(AssertionConst.evalOutputDir);
 			params.add("--test-only");