You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/08/28 18:39:50 UTC
svn commit: r1518282 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
cr/ eval/ pipelines/ train/ util/
Author: swu
Date: Wed Aug 28 16:39:50 2013
New Revision: 1518282
URL: http://svn.apache.org/r1518282
Log:
stuff to get negation (annotation) tests running. a few changes in AssertionConst default locations
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/TestFeatureSelection.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/cr/MiPACQKnowtatorXMLReader.java Wed Aug 28 16:39:50 2013
@@ -125,8 +125,8 @@ import com.google.common.collect.Sets;
import com.google.common.io.Files;
/**
- * assumes knowtator xml files are in "exported-xml" subdirectory
- * and the original plaintext files are in "text" subdirectory
+ * assumes knowtator xml files are in "exported-xml" subdirectory w/ train/dev/test subsubdirs
+ * and the original plaintext files are in "text" subdirectory w/ train/dev/test subsubdirs
*
*/
public class MiPACQKnowtatorXMLReader extends JCasAnnotator_ImplBase {
@@ -168,7 +168,7 @@ public class MiPACQKnowtatorXMLReader ex
URI uri;
try {
- uri = new URI(textPath);
+ uri = new URI("file://"+textPath);
} catch (URISyntaxException e) {
throw new AnalysisEngineProcessException(e);
}
@@ -214,6 +214,15 @@ public class MiPACQKnowtatorXMLReader ex
// LOGGER.info("newPath = " + newPath);
// URI newUri =new URI(newPath);
// LOGGER.info("newUri = " + newUri);
+// String[] textPath = this.getTextURI(jCas).toString().split("/");
+// String lastDir = "";
+// String file = "";
+// if (textPath.length>1) {
+// lastDir = textPath[textPath.length-2];
+// file = textPath[textPath.length-1];
+// }
+// URI relUri = new URI("../../exported-xml/"+lastDir+"/"+file); // relative to text directory
+// URI newUri = this.getTextURI(jCas).resolve(relUri);
URI newUri = new URI(newPath);
return newUri;
} catch (URISyntaxException e) {
@@ -275,7 +284,7 @@ public class MiPACQKnowtatorXMLReader ex
// determine Knowtator XML file from the CAS
URI knowtatorURI = this.getKnowtatorURI(jCas);
if (!new File(knowtatorURI).exists()) {
- LOGGER.fatal("no such Knowtator XML file " + knowtatorURI);
+ LOGGER.warn("near-FATAL: no such Knowtator XML file " + knowtatorURI);
return;
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/TestFeatureSelection.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/TestFeatureSelection.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/TestFeatureSelection.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/TestFeatureSelection.java Wed Aug 28 16:39:50 2013
@@ -21,9 +21,9 @@ public class TestFeatureSelection {
// Extracting features and writing instances
Iterable<Instance<String>> instances = InstanceStream.loadFromDirectory(directory);
- FeatureSelection<String> featureSelection;
- featureSelection = PolarityCleartkAnalysisEngine.createFeatureSelection(1f);
- featureSelection.train(instances);
+// FeatureSelection<String> featureSelection;
+// featureSelection = PolarityCleartkAnalysisEngine.createFeatureSelection(1f);
+// featureSelection.train(instances);
// featureSelection.save(PolarityCleartkAnalysisEngine.createFeatureSelectionURI(directory));
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java Wed Aug 28 16:39:50 2013
@@ -22,6 +22,7 @@ package org.apache.ctakes.assertion.pipe
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
+import java.util.HashMap;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
@@ -35,7 +36,6 @@ import org.uimafit.factory.AnalysisEngin
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.factory.TypeSystemDescriptionFactory;
import org.uimafit.pipeline.SimplePipeline;
-
import org.apache.ctakes.assertion.cr.GoldEntityAndAttributeReader;
import org.apache.ctakes.assertion.cr.I2B2Challenge2010CollectionReader;
import org.apache.ctakes.assertion.cr.MiPACQKnowtatorXMLReader;
@@ -289,50 +289,57 @@ public class GoldEntityAndAttributeReade
logger.info("Finished!");
}
- public static void readMiPACQ(File inputDirectory, File preprocessedDirectory)
+ public static void readMiPACQ(File inputDirectory, File preprocessedDirectory, File testDirectory, File devDirectory)
throws ResourceInitializationException, UIMAException, IOException {
TypeSystemDescription typeSystemDescription =
TypeSystemDescriptionFactory.createTypeSystemDescription();
- AggregateBuilder aggregate = new AggregateBuilder();
+ HashMap<File,File> splitMipacq = new HashMap<File,File>();
+ splitMipacq.put(new File(inputDirectory+"/text/train"), preprocessedDirectory);
+ splitMipacq.put(new File(inputDirectory+"/text/test"), testDirectory);
+ splitMipacq.put(new File(inputDirectory+"/text/dev"), devDirectory);
+ for (File inDir : splitMipacq.keySet() ) {
+ AggregateBuilder aggregate = new AggregateBuilder();
- CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
- FilesInDirectoryCollectionReader.class,
- typeSystemDescription,
- "InputDirectory",
- inputDirectory
- );
-
- // read the UMLS_CEM data from Knowtator
- AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
- MiPACQKnowtatorXMLReader.class,
- typeSystemDescription,
- MiPACQKnowtatorXMLReader.PARAM_TEXT_DIRECTORY,
- preprocessedDirectory
- );
-
- aggregate.add(goldAnnotator);
- // fill in other values that are necessary for preprocessing
- AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
- "desc/analysis_engine/AttributeDiscoveryPreprocessor"
- );
- aggregate.add(preprocessAnnotator);
+ CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
+ FilesInDirectoryCollectionReader.class,
+ typeSystemDescription,
+ "InputDirectory",
+ inDir
+ );
- if (preprocessedDirectory!=null) {
- AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
- XWriter.class,
+ // read the UMLS_CEM data from Knowtator
+ AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
+ MiPACQKnowtatorXMLReader.class,
typeSystemDescription,
- XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
- preprocessedDirectory,
- XWriter.PARAM_FILE_NAMER_CLASS_NAME,
- CtakesFileNamer.class.getName()
- );
- aggregate.add(xWriter2);
- // SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
- }
+ MiPACQKnowtatorXMLReader.PARAM_TEXT_DIRECTORY,
+ inDir
+ );
- SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+ aggregate.add(goldAnnotator);
+ // fill in other values that are necessary for preprocessing
+ AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
+ "desc/analysis_engine/AttributeDiscoveryPreprocessor"
+ );
+ aggregate.add(preprocessAnnotator);
+
+ if (preprocessedDirectory!=null) {
+ AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
+ XWriter.class,
+ typeSystemDescription,
+ XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+ splitMipacq.get(inDir),
+ XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+ CtakesFileNamer.class.getName()
+ );
+ aggregate.add(xWriter2);
+ // SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
+ }
+
+ SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+ }
+
logger.info("Finished!");
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java Wed Aug 28 16:39:50 2013
@@ -19,7 +19,7 @@ public class CrossValidateAttributeModel
params.add("--train-dir"); params.add(AssertionConst.trainingDirectories.get(attribute));
params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
params.add("--cross-validation"); params.add("5");
- params.add("--feature-selection"); params.add("c");
+// params.add("--feature-selection"); params.add("c");
// Build up an "ignore" string
for (String ignoreAttribute : AssertionConst.annotationTypes) {
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityCotrainingTrain.java Wed Aug 28 16:39:50 2013
@@ -16,15 +16,15 @@ import scala.actors.threadpool.Arrays;
public class PolarityCotrainingTrain {
protected final static String SHARP_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/sharp/train";
protected final static String I2B2_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/i2b2/train";
- protected final static String MIPACQ_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/cTAKES-xmi"; // actually test
+ protected final static String MIPACQ_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/train";
protected final static String NEGEX_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/negex"; // actually test
protected final static String SHARP_MODEL = "../ctakes-assertion-res/resources/model/sharptrain";
protected final static String I2B2_MODEL = "../ctakes-assertion-res/resources/model/i2b2train";
- protected final static String MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/mipacqtest";
+ protected final static String MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/mipacqtrain";
protected final static String NEGEX_MODEL = "../ctakes-assertion-res/resources/model/negextest";
protected final static String SHARP_I2B2_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
- protected final static String SHARP_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtest";
+ protected final static String SHARP_MIPACQ_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain";
protected final static String SHARP_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharptrain+negextest";
protected final static String SHARP_I2B2_MIPACQ_NEGEX_MODEL = "../ctakes-assertion-res/resources/model/sharpi2b2mipacqnegex";
@@ -48,11 +48,8 @@ public class PolarityCotrainingTrain {
ArrayList<String> params = new ArrayList<String>();
params.add("--train-dir"); params.add(oneTrain.getKey());
-// params.add("--test-dir"); params.add("sharp_data/dev");
params.add("--models-dir"); params.add(oneTrain.getValue());
-// params.add("--evaluation-output-dir"); params.add(AssertionConst.evalOutputDir);
params.add("--train-only");
-// params.add("--feature-selection"); params.add("1.0");
// Build up an "ignore" string
for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
@@ -67,8 +64,6 @@ public class PolarityCotrainingTrain {
}
String[] paramList = params.toArray(new String[]{});
-// System.out.println(Arrays.asList(paramList).toString());
-
// Run the actual assertion training on just one attribute
AssertionEvaluation.main( paramList );
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TestAttributeModels.java Wed Aug 28 16:39:50 2013
@@ -21,10 +21,10 @@ public class TestAttributeModels {
for (String attribute : AssertionConst.annotationTypes) {
ArrayList<String> params = new ArrayList<String>();
-// AssertionEvaluation.useEvaluationLogFile = true;
+ AssertionEvaluation.useEvaluationLogFile = true;
params.add("--test-dir"); params.add(AssertionConst.testDirectories.get(attribute));
-// params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
+ params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
// params.add("--ytex-negation");
params.add("--evaluation-output-dir"); params.add(AssertionConst.evalOutputDir);
params.add("--test-only");
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1518282&r1=1518281&r2=1518282&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Aug 28 16:39:50 2013
@@ -13,7 +13,7 @@ public class AssertionConst {
// Locally-stored data models
// Note that on Windows, by default, /sharp-home/assertion/ the same as C:/sharp-home/assertion/
- public static final String BASE_DIRECTORY = "/sharp-home/assertion/"; // "/usr/data/work/data/assertion/";// "/Users/m081914/"; // "/usr/data/work/data/assertion/"; // "/usr/data";
+ public static final String BASE_DIRECTORY = "/projects/data/assertion/"; // "/usr/data/work/data/assertion/";// "/Users/m081914/"; // "/usr/data/work/data/assertion/"; // "/usr/data";
static {
if (!BASE_DIRECTORY.endsWith("/") && !BASE_DIRECTORY.endsWith("\\")) {
throw new RuntimeException("BASE_DIRECTORY should end with a slash");
@@ -32,10 +32,10 @@ public class AssertionConst {
public static final String I2B2_2010_TEST_CORPUS = DATA_DIR + "gold_standard/i2b2Challenge2010/Data/Test/reports/";
// expects subdirectories called exported-xml and text
- public static final String MiPACQ_CORPUS = DATA_DIR + "gold_standard/copies-of-just-clinical-knowtator-xml-and-text/";
+ public static final String MiPACQ_CORPUS = DATA_DIR + "gold_standard/mipacq/";
public static final String NEGEX_CORPUS = DATA_DIR + "gold_standard/negex/Annotations-1-120-random.txt";
- public static final String NEGEX_CORPUS_PREPROCESSED = DATA_DIR + "preprocessed_data/negex/";
+ public static final String NEGEX_CORPUS_PREPROCESSED = DATA_DIR + "preprocessed_data/negex/all";
// Just plaintext files, which will be run through cTAKES, to generate XMI - attributes will then be judged
// This in input for cTAKES; the output (evalOutputDir) can then be the input of the judge step.
@@ -44,46 +44,51 @@ public class AssertionConst {
// specify the model to write (train/crossvalidate) or read (test/crossvalidate).
// please rename for different configurations of training data
- public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharp-sprint-train";
-// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharp-sprint-train";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval-fs";
// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain";
// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
-// public static String modelDirectory = "../ctakes-assertion-res/resources/model/i2b2train";
+ public static String modelDirectory = "../ctakes-assertion-res/resources/model/mipacqtrain";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/negextest";
// Specify training directories for each attribute in a (semi)colon-separated list, e.g., "preprocessed_data/dev:preprocessed_data/train"
public static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
static {
// trainingDirectories.put("polarity","sharp_data/train");
-// trainingDirectories.put("polarity","i2b2_data/train");
- trainingDirectories.put("polarity", DATA_DIR + "preprocessed_data/train");
- trainingDirectories.put("conditional", DATA_DIR + "preprocessed_data/train");
- trainingDirectories.put("uncertainty", DATA_DIR + "preprocessed_data/train");
- trainingDirectories.put("subject", DATA_DIR + "preprocessed_data/train");
- trainingDirectories.put("generic", DATA_DIR + "preprocessed_data/train");
- trainingDirectories.put("historyOf", DATA_DIR + "preprocessed_data/train");
+// trainingDirectories.put("polarity", DATA_DIR + "preprocessed_data/i2b2/train");
+ trainingDirectories.put("polarity", DATA_DIR + "preprocessed_data/mipacq/train");
+// trainingDirectories.put("polarity", DATA_DIR + "preprocessed_data/sharp/train");
+// trainingDirectories.put("polarity", DATA_DIR + "preprocessed_data/sharp/train"+":"
+// +DATA_DIR+"preprocessed_data/i2b2/train");
+ trainingDirectories.put("conditional", DATA_DIR + "preprocessed_data/sharp/train");
+ trainingDirectories.put("uncertainty", DATA_DIR + "preprocessed_data/sharp/train");
+ trainingDirectories.put("subject", DATA_DIR + "preprocessed_data/sharp/train");
+ trainingDirectories.put("generic", DATA_DIR + "preprocessed_data/sharp/train");
+ trainingDirectories.put("historyOf", DATA_DIR + "preprocessed_data/sharp/train");
}
public static HashMap<String,String> testDirectories = new HashMap<String,String>();
static {
-// testDirectories.put("polarity","i2b2_data/test");
- testDirectories.put("polarity", DATA_DIR + "preprocessed_data/test");
- testDirectories.put("conditional", DATA_DIR + "preprocessed_data/test");
- testDirectories.put("uncertainty", DATA_DIR + "preprocessed_data/test");
- testDirectories.put("subject", DATA_DIR + "preprocessed_data/test");
- testDirectories.put("generic", DATA_DIR + "preprocessed_data/test");
- testDirectories.put("historyOf", DATA_DIR + "preprocessed_data/test");
+// testDirectories.put("polarity", DATA_DIR + "preprocessed_data/i2b2/test");
+ testDirectories.put("polarity", DATA_DIR + "preprocessed_data/sharp/test");
+// testDirectories.put("polarity", DATA_DIR + "preprocessed_data/sharp/dev");
+// testDirectories.put("conditional", DATA_DIR + "preprocessed_data/sharp/test");
+// testDirectories.put("uncertainty", DATA_DIR + "preprocessed_data/sharp/test");
+// testDirectories.put("subject", DATA_DIR + "preprocessed_data/sharp/test");
+// testDirectories.put("generic", DATA_DIR + "preprocessed_data/sharp/test");
+// testDirectories.put("historyOf", DATA_DIR + "preprocessed_data/sharp/test");
}
// If you don't want to train/cross-validate everything, comment these out
public static ArrayList<String> annotationTypes = new ArrayList<String>();
static {
annotationTypes.add("polarity");
- annotationTypes.add("conditional");
- annotationTypes.add("uncertainty");
- annotationTypes.add("subject");
- annotationTypes.add("generic");
- annotationTypes.add("historyOf");
+// annotationTypes.add("conditional");
+// annotationTypes.add("uncertainty");
+// annotationTypes.add("subject");
+// annotationTypes.add("generic");
+// annotationTypes.add("historyOf");
}
@@ -92,25 +97,27 @@ public class AssertionConst {
// Specify input and output data locations for preprocessing. Results will be used for model training
public static HashMap<String,String> preprocessRootDirectory = new HashMap<String,String>();
static {
- preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/train");
- preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/train");
- //preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "i2b2_data/train");
- //preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "i2b2_data/test");
+// preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/train");
+// preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/train");
+// preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "preprocessed_data/i2b2/train");
+// preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "preprocessed_data/i2b2/test");
+ preprocessRootDirectory.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/train");
+// preprocessRootDirectory.put(DATA_DIR + "gold_standard/negex", DATA_DIR + "preprocessed_data/negex");
// If one of the preprocessRootDirectory entries above is commented out, warn user with a popup
- if (preprocessRootDirectory.keySet().size()<4) {
- JFrame frame = new JFrame("DialogDemo");
- frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
-
- //Create and set up the content pane.
- JOptionPane.showMessageDialog(frame, "Commented out one or more data dir(s) for now.. add back before using for real.");
- frame.dispose();
- //frame.setContentPane(newContentPane);
-
- //Display the window.
- //frame.pack();
- //frame.setVisible(true);
- }
+// if (preprocessRootDirectory.keySet().size()<4) {
+// JFrame frame = new JFrame("DialogDemo");
+// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+//
+// //Create and set up the content pane.
+// JOptionPane.showMessageDialog(frame, "Commented out one or more data dir(s) for now.. add back before using for real.");
+// frame.dispose();
+// //frame.setContentPane(newContentPane);
+//
+// //Display the window.
+// //frame.pack();
+// //frame.setVisible(true);
+// }
}
@@ -118,16 +125,18 @@ public class AssertionConst {
// The map maps input dir to output dir
public static HashMap<String,String> preprocessForTest = new HashMap<String,String>();
static {
- preprocessForTest.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/test");
- preprocessForTest.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/test");
+ preprocessForTest.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/test");
+ preprocessForTest.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/test");
+ preprocessForTest.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/test");
}
// Specify input and output data locations for preprocessing. Results will be used for model dev
// The map maps input dir to output dir
public static HashMap<String,String> preprocessForDev = new HashMap<String,String>();
static {
- preprocessForDev.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/dev");
- preprocessForDev.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/dev");
+ preprocessForDev.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/dev");
+ preprocessForDev.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/sharp/dev");
+ preprocessForDev.put(MiPACQ_CORPUS, DATA_DIR + "preprocessed_data/mipacq/dev");
}
public static String evalOutputDir = DATA_DIR + "processing_output_aka_eval_output";