You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/08/28 22:56:00 UTC
svn commit: r1518382 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
cr/NegExCorpusReader.java eval/AssertionEvaluation.java
train/PolarityCotrainingTests.java train/PolarityCotrainingTrain.java
util/AssertionConst.java
Author: swu
Date: Wed Aug 28 20:56:00 2013
New Revision: 1518382
URL: http://svn.apache.org/r1518382
Log:
more negation (annotation/cotraining) tests
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/NegExCorpusReader.java Wed Aug 28 20:56:00 2013
@@ -62,7 +62,7 @@ public class NegExCorpusReader extends C
public NegExCorpusReader() {
- this(true);
+ this(false);
}
public NegExCorpusReader(boolean skipReadingValuesJustReadText) {
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Wed Aug 28 20:56:00 2013
@@ -276,7 +276,7 @@ private static Logger logger = Logger.ge
resetOptions();
options.parseOptions(args);
- if (useEvaluationLogFile) {
+ if (useEvaluationLogFile && evaluationLogFileOut == null) {
evaluationLogFile = new File(evaluationLogFilePath);
evaluationLogFileOut = new BufferedWriter(new FileWriter(evaluationLogFile), 32768);
}
@@ -484,17 +484,20 @@ public static void printScore(Map<String
String annotationType = currentEntry.getKey();
AnnotationStatisticsCompact stats = currentEntry.getValue();
- System.out.format("directory: \"%s\"; assertion type: %s%n%n%s%n%n",
+ System.out.format("directory: \"%s\"; assertion type: %s%n%s%n%s%n%n",
directory,
annotationType.toUpperCase(),
+ options.testDirectory,
stats.toString());
try {
if (useEvaluationLogFile) {
evaluationLogFileOut.write(
- String.format("%s\t%f\t%s",
+ String.format("%s\t%f\t%s\t%s\t%s",
annotationType,
options.featureSelectionThreshold,
+ options.modelsDirectory.getName(),
+ options.testDirectory.toString(),
stats.toTsv())
);
evaluationLogFileOut.flush();
@@ -556,7 +559,7 @@ public static void printScore(Map<String
GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(rawDir, preprocessedDir);
} else if (rawDir.getAbsolutePath().contains("mipacq")) {
- GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir);
+ GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
} else if (rawDir.getAbsolutePath().contains("negex")) {
GoldEntityAndAttributeReaderPipelineForSeedCorpus.readNegexTestSet(rawDir, preprocessedDir);
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTests.java Wed Aug 28 20:56:00 2013
@@ -1,12 +1,10 @@
package org.apache.ctakes.assertion.train;
import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map.Entry;
+import java.util.Date;
import org.apache.ctakes.assertion.eval.AssertionEvaluation;
import org.apache.ctakes.assertion.util.AssertionConst;
-import org.apache.ctakes.assertion.train.PolarityCotrainingTrain.*;
import scala.actors.threadpool.Arrays;
@@ -18,61 +16,68 @@ import scala.actors.threadpool.Arrays;
*/
public class PolarityCotrainingTests {
+ final static String RUN_ID = "fullgrid_";
+
protected final static String SHARP_TEST = AssertionConst.DATA_DIR + "preprocessed_data/sharp/test";
protected final static String I2B2_TEST = AssertionConst.DATA_DIR + "preprocessed_data/i2b2/test";
- protected final static String MIPACQ_TEST = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/cTAKES-xmi";
+ protected final static String MIPACQ_TEST = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/test";
protected final static String NEGEX_TEST = AssertionConst.DATA_DIR + "preprocessed_data/negex";
public static void main(String[] args) throws Exception {
AssertionEvaluation.useEvaluationLogFile = true;
+ AssertionEvaluation.evaluationLogFilePath = "eval/"+RUN_ID+new Date().toString().replaceAll(" ","_") + ".txt";
- HashMap<String,String> testGrid = new HashMap<String,String>();
- testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MODEL, NEGEX_TEST);
- testGrid.put(PolarityCotrainingTrain.I2B2_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.I2B2_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.I2B2_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.I2B2_MODEL, NEGEX_TEST);
- testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL, MIPACQ_TEST); // not valid
- testGrid.put(PolarityCotrainingTrain.MIPACQ_MODEL, NEGEX_TEST);
- testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.NEGEX_MODEL, NEGEX_TEST); // not valid
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MODEL, NEGEX_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, MIPACQ_TEST); // not valid
- testGrid.put(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, NEGEX_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, NEGEX_TEST); // not valid
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, SHARP_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, I2B2_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, MIPACQ_TEST);
- testGrid.put(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, NEGEX_TEST);
+ ArrayList<TestPair> testGrid = new ArrayList<TestPair>();
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MODEL, NEGEX_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MODEL, NEGEX_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.MIPACQ_MODEL, NEGEX_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.NEGEX_MODEL, NEGEX_TEST)); // not valid
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, I2B2_TEST));
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, MIPACQ_TEST)); // not meaningful
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MODEL, NEGEX_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, SHARP_TEST));
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, I2B2_TEST)); // not meaningful
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, MIPACQ_TEST));
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_MODEL, NEGEX_TEST));
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, SHARP_TEST)); // not meaningful
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, I2B2_TEST)); //not meaningful
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, MIPACQ_TEST)); // not meaningful
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_NEGEX_MODEL, NEGEX_TEST)); // not valid
+ testGrid.add(new TestPair(PolarityCotrainingTrain.I2B2_MIPACQ_NEGEX_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_MIPACQ_NEGEX_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_NEGEX_MODEL, MIPACQ_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_MODEL, NEGEX_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, SHARP_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, I2B2_TEST));
+ testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, MIPACQ_TEST));
+// testGrid.add(new TestPair(PolarityCotrainingTrain.SHARP_I2B2_MIPACQ_NEGEX_MODEL, NEGEX_TEST)); //not valid
String attribute = "polarity";
- for (Entry<String, String> oneTest : testGrid.entrySet()) {
+ for (TestPair oneTest : testGrid) {
ArrayList<String> params = new ArrayList<String>();
- params.add("--test-dir"); params.add(oneTest.getValue());
- params.add("--models-dir"); params.add(oneTest.getKey());
+ params.add("--test-dir"); params.add(oneTest.data);
+ params.add("--models-dir"); params.add(oneTest.model);
// params.add("--ytex-negation");
// params.add("--evaluation-output-dir"); params.add(AssertionConst.evalOutputDir);
params.add("--test-only");
- params.add("--print-errors");
+// params.add("--print-errors");
// Build up an "ignore" string
for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
@@ -95,5 +100,14 @@ public class PolarityCotrainingTests {
}
+ static class TestPair {
+ String model;
+ String data;
+ TestPair (String a, String b) {
+ model=a;
+ data=b;
+ }
+ }
+
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java Wed Aug 28 20:56:00 2013
@@ -19,13 +19,17 @@ public class PolarityCotrainingTrain {
protected final static String MIPACQ_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/train";
protected final static String NEGEX_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/negex"; // actually test
- protected final static String SHARP_MODEL = "../ctakes-assertion-res/resources/model/sharptrain";
+ public final static String SHARP_MODEL = "../ctakes-assertion-res/resources/model/sharptrain";
protected final static String I2B2_MODEL = "../ctakes-assertion-res/resources/model/i2b2train";
protected final static String MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/mipacqtrain";
protected final static String NEGEX_MODEL = "../ctakes-assertion-res/resources/model/negextest";
protected final static String SHARP_I2B2_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
protected final static String SHARP_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain";
protected final static String SHARP_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+negextest";
+ protected final static String I2B2_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/i2b2train+mipacqtrain+negextest";
+ protected final static String SHARP_I2B2_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+mipacqtrain";
+ protected final static String SHARP_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain+negextest";
+ protected final static String SHARP_I2B2_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+negextest";
protected final static String SHARP_I2B2_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharpi2b2mipacqnegex";
public static void main(String[] args) throws Exception {
@@ -40,6 +44,10 @@ public class PolarityCotrainingTrain {
trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN, SHARP_I2B2_MODEL);
trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN, SHARP_MIPACQ_MODEL);
trainGrid.put(SHARP_TRAIN+":"+NEGEX_TRAIN, SHARP_NEGEX_MODEL);
+ trainGrid.put(I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN, I2B2_MIPACQ_NEGEX_MODEL);
+ trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN, SHARP_I2B2_MIPACQ_MODEL);
+ trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN, SHARP_MIPACQ_NEGEX_MODEL);
+ trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+NEGEX_TRAIN, SHARP_I2B2_NEGEX_MODEL);
trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,
SHARP_I2B2_MIPACQ_NEGEX_MODEL);
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1518382&r1=1518381&r2=1518382&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Aug 28 20:56:00 2013
@@ -101,8 +101,8 @@ public class AssertionConst {
// preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/train");
// preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "preprocessed_data/i2b2/train");
// preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "preprocessed_data/i2b2/test");
- preprocessRootDirectory.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/train");
-// preprocessRootDirectory.put(DATA_DIR + "gold_standard/negex", DATA_DIR + "preprocessed_data/negex");
+// preprocessRootDirectory.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/train");
+ preprocessRootDirectory.put(DATA_DIR + "gold_standard/negex", DATA_DIR + "preprocessed_data/negex");
// If one of the preprocessRootDirectory entries above is commented out, warn user with a popup
// if (preprocessRootDirectory.keySet().size()<4) {