You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/11/25 22:25:27 UTC
svn commit: r1545413 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/AssertionEvaluation.java train/PolarityCotrainingTests.java
train/TestAttributeModels.java
Author: swu
Date: Mon Nov 25 21:25:26 2013
New Revision: 1545413
URL: http://svn.apache.org/r1545413
Log:
some edits to assertion eval scripts
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon Nov 25 21:25:26 2013
@@ -224,6 +224,12 @@ private static Logger logger = Logger.ge
public boolean printErrors = false;
@Option(
+ name = "--print-instances",
+ usage = "Flag to have test method print out lots of info for statistical significance testing",
+ required = false)
+ public File printInstances;
+
+ @Option(
name = "--eval-only",
usage = "Evaluate a CASes (supply the directory as an argument) with both system and gold in them.",
required = false)
@@ -421,6 +427,7 @@ private static Logger logger = Logger.ge
options.testOnly = false;
options.noCleartk = false;
options.printErrors = false;
+ options.printInstances = null;
options.evalOnly = false;
options.evaluationOutputDirectory = null;
@@ -941,6 +948,9 @@ public static void printScore(Map<String
if(options.printErrors){
printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class);
}
+ if(options.printInstances!=null){
+ printInstances(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class, options.printInstances);
+ }
}
if (!options.ignoreConditional)
@@ -1212,6 +1222,100 @@ private static void printErrors(JCas jCa
}
}
+private static void printInstances(JCas jCas,
+ Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
+ Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType,
+ Object trueCategory, Class<? extends Object> categoryClass,
+ File outputfile)
+ throws ResourceProcessException, IOException {
+
+ String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+ BufferedWriter fileOutWriter = new BufferedWriter(new FileWriter(outputfile,true), 32768);
+
+ Map<HashableAnnotation, IdentifiedAnnotation> goldMap = Maps.newHashMap();
+ for (IdentifiedAnnotation mention : goldEntitiesAndEvents) {
+ goldMap.put(new HashableAnnotation(mention), mention);
+ }
+ Map<HashableAnnotation, IdentifiedAnnotation> systemMap = Maps.newHashMap();
+ for (IdentifiedAnnotation relation : systemEntitiesAndEvents) {
+ systemMap.put(new HashableAnnotation(relation), relation);
+ }
+ Set<HashableAnnotation> all = Sets.union(goldMap.keySet(), systemMap.keySet());
+ List<HashableAnnotation> sorted = Lists.newArrayList(all);
+ Collections.sort(sorted);
+ for (HashableAnnotation key : sorted) {
+ IdentifiedAnnotation goldAnnotation = goldMap.get(key);
+ IdentifiedAnnotation systemAnnotation = systemMap.get(key);
+ Object goldLabel=null;
+ Object systemLabel=null;
+ if (goldAnnotation == null) {
+ logger.debug(key + " not found in gold annotations ");
+ } else {
+ Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
+ goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
+ // Integer goldLabel = goldAnnotation.getIntValue(feature);
+ }
+
+ if (systemAnnotation == null) {
+ logger.info(key + " not found in system annotations ");
+ } else {
+ Feature feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
+ systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
+ // Integer systemLabel = systemAnnotation.getIntValue(feature);
+ }
+
+ String typeId = "X";
+ String typeName = "IdentifiedAnnotation";
+ int polarity, uncertainty, historyOf;
+ boolean conditional, generic;
+ String subject = "";
+ String cui, coveredText = "";
+ String instanceData = "";
+ if (systemAnnotation!=null && systemAnnotation.getEnd()>=0) {
+ typeId = systemAnnotation.getTypeID()+"";
+ typeName = systemAnnotation.getClass().getSimpleName();
+ polarity = systemAnnotation.getPolarity();
+ uncertainty = systemAnnotation.getUncertainty();
+ conditional = systemAnnotation.getConditional();
+ generic = systemAnnotation.getGeneric();
+ subject = systemAnnotation.getSubject();
+ historyOf = systemAnnotation.getHistoryOf();
+ coveredText = systemAnnotation.getCoveredText().replaceAll("\\n", " ").replaceAll(",",";");
+ instanceData = documentId+","+polarity+","+uncertainty+","+conditional+","+generic+","+subject+","+historyOf+","+
+ typeId+","+typeName+","+coveredText;
+ }
+
+ if (goldLabel==null) {
+ // skip counting the attribute value since we have no gold label to compare to
+ logger.debug("Skipping annotation with label " + systemLabel + " because gold label is null");
+ } else if (instanceData.equals("")) {
+ continue;
+ }
+ else {
+ if(!goldLabel.equals(systemLabel)){
+ if(trueCategory == null){
+ // used for multi-class case. Incorrect_system_label(Correct_label):
+ fileOutWriter.write(classifierType+",F,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }else if(systemLabel.equals(trueCategory)){
+ fileOutWriter.write(classifierType+",FP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }else{
+ fileOutWriter.write(classifierType+",FN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }
+ }else{
+ if(trueCategory == null){
+ // multi-class case -- probably don't want to print anything?
+ fileOutWriter.write(classifierType+ ",T,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }else if(systemLabel.equals(trueCategory)){
+ fileOutWriter.write(classifierType+",TP,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }else{
+ fileOutWriter.write(classifierType+",TN,"+systemLabel+","+goldLabel+","+instanceData+"\n");
+ }
+ }
+ fileOutWriter.flush();
+ }
+ }
+ fileOutWriter.close();
+ }
private static Object getFeatureValue(Feature feature,
Class<? extends Object> class1, Annotation annotation) throws ResourceProcessException {
if(class1 == Integer.class){
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java Mon Nov 25 21:25:26 2013
@@ -1,5 +1,6 @@
package org.apache.ctakes.assertion.train;
+import java.io.File;
import java.util.ArrayList;
import java.util.Date;
@@ -47,24 +48,27 @@ public class PolarityCotrainingTests {
testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL, NEGEX_TEST)); // not valid
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, SHARP_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, I2B2_TEST));
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, MIPACQ_TEST)); // not meaningful
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, NEGEX_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, MIPACQ_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, NEGEX_TEST)); // not meaningful
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, SHARP_TEST));
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, I2B2_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, I2B2_TEST)); // not meaningful
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, MIPACQ_TEST));
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, NEGEX_TEST));
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, SHARP_TEST)); // not meaningful
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, I2B2_TEST)); //not meaningful
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, MIPACQ_TEST)); // not meaningful
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, NEGEX_TEST)); // not valid
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, NEGEX_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, SHARP_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, I2B2_TEST)); //not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, MIPACQ_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, NEGEX_TEST)); // not valid
testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MIPACQ_NEGEX_MODEL, SHARP_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_NEGEX_MODEL, I2B2_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_NEGEX_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL, MIPACQ_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL, NEGEX_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, SHARP_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, I2B2_TEST));
testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, MIPACQ_TEST));
-// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, NEGEX_TEST)); //not valid
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, NEGEX_TEST)); //not valid
String attribute = "polarity";
@@ -72,12 +76,18 @@ public class PolarityCotrainingTests {
for (TestPair oneTest : testGrid) {
ArrayList<String> params = new ArrayList<String>();
+ File instancef = new File("eval/instances_"+
+ oneTest.model.substring(oneTest.model.lastIndexOf("/")+1)+"_"+
+ oneTest.data.substring(oneTest.data.length()-13).replaceAll("\\/", "-"));
+
params.add("--test-dir"); params.add(oneTest.data);
params.add("--models-dir"); params.add(oneTest.model);
// params.add("--ytex-negation");
// params.add("--evaluation-output-dir"); params.add(AssertionConst.evalOutputDir);
params.add("--test-only");
-// params.add("--print-errors");
+ params.add("--print-instances");
+ // hack-y way to name this
+ params.add(instancef.getAbsolutePath());
// Build up an "ignore" string
for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java?rev=1545413&r1=1545412&r2=1545413&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java Mon Nov 25 21:25:26 2013
@@ -1,6 +1,7 @@
package org.apache.ctakes.assertion.train;
import java.util.ArrayList;
+import java.util.Date;
import java.util.HashMap;
import org.apache.ctakes.assertion.eval.AssertionEvaluation;
@@ -15,6 +16,7 @@ import scala.actors.threadpool.Arrays;
* Note that this uses constants within {@link AssertionConst} for the directory names.
*/
public class TestAttributeModels {
+ final static String RUN_ID = "ytex_";
public static void main(String[] args) throws Exception {
@@ -22,10 +24,11 @@ public class TestAttributeModels {
ArrayList<String> params = new ArrayList<String>();
AssertionEvaluation.useEvaluationLogFile = true;
+ AssertionEvaluation.evaluationLogFilePath = "eval/"+RUN_ID+new Date().toString().replaceAll(" ","_") + ".txt";
params.add("--test-dir"); params.add(AssertionConst.testDirectories.get(attribute));
params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
-// params.add("--ytex-negation");
+ params.add("--ytex-negation");
params.add("--evaluation-output-dir"); params.add(AssertionConst.evalOutputDir);
params.add("--test-only");