You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/08/28 22:56:00 UTC

svn commit: r1518382 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: cr/NegExCorpusReader.java eval/AssertionEvaluation.java train/PolarityCotrainingTests.java train/PolarityCotrainingTrain.java util/AssertionConst.java

Author: swu
Date: Wed Aug 28 20:56:00 2013
New Revision: 1518382

URL: http://svn.apache.org/r1518382
Log:
more negation (annotation/cotraining) tests

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java Wed Aug 28 20:56:00 2013
@@ -62,7 +62,7 @@ public class NegExCorpusReader extends C
 
 
   public NegExCorpusReader() {
-	  this(true);
+	  this(false);
   }
   
   public NegExCorpusReader(boolean skipReadingValuesJustReadText) {

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Wed Aug 28 20:56:00 2013
@@ -276,7 +276,7 @@ private static Logger logger = Logger.ge
 	  resetOptions();
 	  options.parseOptions(args);
 
-	  if (useEvaluationLogFile) {
+	  if (useEvaluationLogFile && evaluationLogFileOut == null) {
 		  evaluationLogFile = new File(evaluationLogFilePath);
 		  evaluationLogFileOut = new BufferedWriter(new FileWriter(evaluationLogFile), 32768);
 	  }
@@ -484,17 +484,20 @@ public static void printScore(Map<String
     	  String annotationType = currentEntry.getKey();
     	  AnnotationStatisticsCompact stats = currentEntry.getValue();
     	  
-    	  System.out.format("directory: \"%s\"; assertion type: %s%n%n%s%n%n",
+    	  System.out.format("directory: \"%s\"; assertion type: %s%n%s%n%s%n%n",
     	    directory,
     	    annotationType.toUpperCase(),
+    	    options.testDirectory,
     	    stats.toString());
     	  
     	  try {
     		  if (useEvaluationLogFile) {
     			  evaluationLogFileOut.write(
-    					  String.format("%s\t%f\t%s",
+    					  String.format("%s\t%f\t%s\t%s\t%s",
     							  annotationType,
     							  options.featureSelectionThreshold,
+    							  options.modelsDirectory.getName(),
+    							  options.testDirectory.toString(),
     							  stats.toTsv())
     					  );
     			  evaluationLogFileOut.flush();
@@ -556,7 +559,7 @@ public static void printScore(Map<String
 		  GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(rawDir, preprocessedDir);
 		  
 	  } else if (rawDir.getAbsolutePath().contains("mipacq")) {
-		  GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir);
+		  GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
 		  
 	  } else if (rawDir.getAbsolutePath().contains("negex")) {
 		  GoldEntityAndAttributeReaderPipelineForSeedCorpus.readNegexTestSet(rawDir, preprocessedDir);

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java Wed Aug 28 20:56:00 2013
@@ -1,12 +1,10 @@
 package org.apache.ctakes.assertion.train;
 
 import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map.Entry;
+import java.util.Date;
 
 import org.apache.ctakes.assertion.eval.AssertionEvaluation;
 import org.apache.ctakes.assertion.util.AssertionConst;
-import org.apache.ctakes.assertion.train.PolarityCotrainingTrain.*;
 
 import scala.actors.threadpool.Arrays;
 
@@ -18,61 +16,68 @@ import scala.actors.threadpool.Arrays;
  */
 public class PolarityCotrainingTests {
 
+	final static String RUN_ID = "fullgrid_";
+	
 	protected final static String SHARP_TEST = AssertionConst.DATA_DIR + "preprocessed_data/sharp/test";
 	protected final static String I2B2_TEST  = AssertionConst.DATA_DIR + "preprocessed_data/i2b2/test";
-	protected final static String MIPACQ_TEST = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/cTAKES-xmi";
+	protected final static String MIPACQ_TEST = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/test";
 	protected final static String NEGEX_TEST = AssertionConst.DATA_DIR + "preprocessed_data/negex";
 	
 	public static void main(String[] args) throws Exception {
 
 		AssertionEvaluation.useEvaluationLogFile = true;
+		AssertionEvaluation.evaluationLogFilePath = "eval/"+RUN_ID+new Date().toString().replaceAll(" ","_") + ".txt";
 
-		HashMap<String,String> testGrid = new HashMap<String,String>();
-		testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, 	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, 	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, 	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, 	NEGEX_TEST);
-		testGrid.put(PolarityCotrainingTrain.I2B2_MODEL,  	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.I2B2_MODEL,  	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.I2B2_MODEL,  	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.I2B2_MODEL,  	NEGEX_TEST);
-		testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL,  SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL,  I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL,  MIPACQ_TEST); // not valid
-		testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL,  NEGEX_TEST);
-		testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL,  	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL,  	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL,  	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL,  	NEGEX_TEST);  // not valid
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	NEGEX_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  MIPACQ_TEST); // not valid
-		testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  NEGEX_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	NEGEX_TEST);  // not valid
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	SHARP_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	I2B2_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	MIPACQ_TEST);
-		testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	NEGEX_TEST);
+		ArrayList<TestPair> testGrid = new ArrayList<TestPair>();
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, 	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, 	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, 	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, 	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL,  	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL,  	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL,  SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL,  I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL,  MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL,  NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL,  	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL,  	NEGEX_TEST));  // not valid
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	I2B2_TEST));
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	MIPACQ_TEST)); // not meaningful
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL,  	NEGEX_TEST));  // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  SHARP_TEST));
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  I2B2_TEST));    // not meaningful
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  MIPACQ_TEST));
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL,  NEGEX_TEST));
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	SHARP_TEST)); // not meaningful
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	I2B2_TEST));  //not meaningful
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	MIPACQ_TEST)); // not meaningful
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL,  	NEGEX_TEST));  // not valid
+		testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MIPACQ_NEGEX_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_NEGEX_MODEL,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_NEGEX_MODEL,  		MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL,  	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	MIPACQ_TEST));
+//		testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL,  	NEGEX_TEST)); //not valid
 
 		
 		String attribute = "polarity";
 
-		for (Entry<String, String> oneTest : testGrid.entrySet()) {
+		for (TestPair oneTest : testGrid) {
 			ArrayList<String> params = new ArrayList<String>();
 
-			params.add("--test-dir"); 	params.add(oneTest.getValue());
-			params.add("--models-dir"); params.add(oneTest.getKey());
+			params.add("--test-dir"); 	params.add(oneTest.data);
+			params.add("--models-dir"); params.add(oneTest.model);
 			//			params.add("--ytex-negation");
 			//		params.add("--evaluation-output-dir");	params.add(AssertionConst.evalOutputDir);
 			params.add("--test-only");	
-			params.add("--print-errors");
+//			params.add("--print-errors");
 
 			// Build up an "ignore" string
 			for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
@@ -95,5 +100,14 @@ public class PolarityCotrainingTests {
 	}
 
 
+	static class TestPair {
+		String model;
+		String data;
+		TestPair (String a, String b) {
+			model=a;
+			data=b;
+		}
+	}
+	
 
 }

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java Wed Aug 28 20:56:00 2013
@@ -19,13 +19,17 @@ public class PolarityCotrainingTrain {
 	protected final static String MIPACQ_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/train";
 	protected final static String NEGEX_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/negex"; // actually test
 
-	protected final static String SHARP_MODEL = "../ctakes-assertion-res/resources/model/sharptrain";
+	public final static String SHARP_MODEL = "../ctakes-assertion-res/resources/model/sharptrain";
 	protected final static String I2B2_MODEL  = "../ctakes-assertion-res/resources/model/i2b2train";
 	protected final static String MIPACQ_MODEL  = "../ctakes-assertion-res/resources/model/mipacqtrain";
 	protected final static String NEGEX_MODEL  = "../ctakes-assertion-res/resources/model/negextest";
 	protected final static String SHARP_I2B2_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
 	protected final static String SHARP_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain";
 	protected final static String SHARP_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+negextest";
+	protected final static String I2B2_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/i2b2train+mipacqtrain+negextest";
+	protected final static String SHARP_I2B2_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+mipacqtrain";
+	protected final static String SHARP_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain+negextest";
+	protected final static String SHARP_I2B2_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+negextest";
 	protected final static String SHARP_I2B2_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharpi2b2mipacqnegex";
 
 	public static void main(String[] args) throws Exception {
@@ -40,6 +44,10 @@ public class PolarityCotrainingTrain {
 		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN,	SHARP_I2B2_MODEL);
 		trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN,	SHARP_MIPACQ_MODEL);
 		trainGrid.put(SHARP_TRAIN+":"+NEGEX_TRAIN,	SHARP_NEGEX_MODEL);
+		trainGrid.put(I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	I2B2_MIPACQ_NEGEX_MODEL);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN,	SHARP_I2B2_MIPACQ_MODEL);
+		trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	SHARP_MIPACQ_NEGEX_MODEL);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+NEGEX_TRAIN,	SHARP_I2B2_NEGEX_MODEL);
 		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	
 				SHARP_I2B2_MIPACQ_NEGEX_MODEL);
 

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Aug 28 20:56:00 2013
@@ -101,8 +101,8 @@ public class AssertionConst {
 //		preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/train");
 //		preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "preprocessed_data/i2b2/train");
 //		preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "preprocessed_data/i2b2/test");
-		preprocessRootDirectory.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/train");
-//		preprocessRootDirectory.put(DATA_DIR + "gold_standard/negex", DATA_DIR + "preprocessed_data/negex");
+//		preprocessRootDirectory.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/train");
+		preprocessRootDirectory.put(DATA_DIR + "gold_standard/negex", DATA_DIR + "preprocessed_data/negex");
 		
 		// If one of the preprocessRootDirectory entries above is commented out, warn user with a popup
 //		if (preprocessRootDirectory.keySet().size()<4) {