You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2013/07/10 17:25:46 UTC

svn commit: r1501796 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java

Author: james-masanz
Date: Wed Jul 10 15:25:46 2013
New Revision: 1501796

URL: http://svn.apache.org/r1501796
Log:
use a fresh set of directory names for testing so we don't have confusion with files created yesterday or previous to that.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1501796&r1=1501795&r2=1501796&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Jul 10 15:25:46 2013
@@ -12,54 +12,56 @@ public class AssertionConst {
 
 	// Locally-stored data models
 	
-	public static final String BASE_DIRECTORY = "/usr/data/work/data/assertion/";// "/Users/m081914/"; // "/usr/data/work/data/assertion/";  // "/usr/data";
+	// Note that on Windows, by default, /sharp-home/assertion/ the same as C:/sharp-home/assertion/
+	public static final String BASE_DIRECTORY = "/sharp-home/assertion/"; // "/usr/data/work/data/assertion/";// "/Users/m081914/"; // "/usr/data/work/data/assertion/";  // "/usr/data";
 	static {
 		if (!BASE_DIRECTORY.endsWith("/") && !BASE_DIRECTORY.endsWith("\\")) {
 			throw new RuntimeException("BASE_DIRECTORY should end with a slash");
 		}
 	}
 
+	// raw and processed text, expects subdirectories for different sources, then subsubdirectories for train/test/dev
+	public static final String DATA_DIR = BASE_DIRECTORY + "data/"; // + "work/data/assertion/";
+	
+
 	// expects subdirectories: "Mayo/UMLS_CEM/*batch*/Knowtator*" "Seattle Group Health/UMLS_CEM/*batch*/Knowtator*"
-	public static final String SHARP_SEED_CORPUS = BASE_DIRECTORY + "work/data/sharp/Seed Corpus/";
+	public static final String SHARP_SEED_CORPUS = DATA_DIR + "gold_standard/sharp/Seed Corpus/";
 	// expects subdirectories: ast, txt 
-	public static final String I2B2_2010_CORPUS = BASE_DIRECTORY + "work/data/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain/";
+	public static final String I2B2_2010_CORPUS = DATA_DIR + "gold_standard/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain/";
 	// expects subdirectories: ast, txt
-	public static final String I2B2_2010_TEST_CORPUS = BASE_DIRECTORY + "work/data/i2b2Challenge2010/Data/Test/reports/";
+	public static final String I2B2_2010_TEST_CORPUS = DATA_DIR + "gold_standard/i2b2Challenge2010/Data/Test/reports/";
 
-	// raw and processed text, expects subdirectories for different sources, then subsubdirectories for train/test/dev
-	public static final String DATA_DIR = BASE_DIRECTORY; // + "work/data/assertion/";
-	
 	// specify the model to write (train/crossvalidate) or read (test/crossvalidate).
 	//  please rename for different configurations of training data 
-	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharp-sprint-train";
 //	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
 //	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain";
 //	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
 //	public static String modelDirectory = "../ctakes-assertion-res/resources/model/i2b2train";
 
 	
-	// Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train"
+	// Specify training directories for each attribute in a (semi)colon-separated list, e.g., "preprocessed_data/dev:preprocessed_data/train"
 	public static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
 	static { 
-		trainingDirectories.put("polarity", DATA_DIR + "fromtim_sharp_data/train");
 //		trainingDirectories.put("polarity","sharp_data/train");
 //		trainingDirectories.put("polarity","i2b2_data/train");
-		trainingDirectories.put("conditional", DATA_DIR + "fromtim_sharp_data/train");
-		trainingDirectories.put("uncertainty", DATA_DIR + "fromtim_sharp_data/train");
-		trainingDirectories.put("subject", DATA_DIR + "fromtim_sharp_data/train");
-		trainingDirectories.put("generic", DATA_DIR + "fromtim_sharp_data/train");
-		trainingDirectories.put("historyOf", DATA_DIR + "fromtim_sharp_data/train");
+		trainingDirectories.put("polarity", DATA_DIR +  "preprocessed_data/train");
+		trainingDirectories.put("conditional", DATA_DIR +  "preprocessed_data/train");
+		trainingDirectories.put("uncertainty", DATA_DIR +  "preprocessed_data/train");
+		trainingDirectories.put("subject", DATA_DIR +  "preprocessed_data/train");
+		trainingDirectories.put("generic", DATA_DIR +  "preprocessed_data/train");
+		trainingDirectories.put("historyOf", DATA_DIR +  "preprocessed_data/train");
 	}
 		
 	public static HashMap<String,String> testDirectories = new HashMap<String,String>();
 	static { 
-		testDirectories.put("polarity", DATA_DIR + "fromtim_sharp_data/test");
 //		testDirectories.put("polarity","i2b2_data/test");
-		testDirectories.put("conditional", DATA_DIR + "fromtim_sharp_data/test");
-		testDirectories.put("uncertainty", DATA_DIR + "fromtim_sharp_data/test");
-		testDirectories.put("subject", DATA_DIR + "fromtim_sharp_data/test");
-		testDirectories.put("generic", DATA_DIR + "fromtim_sharp_data/test");
-		testDirectories.put("historyOf", DATA_DIR + "fromtim_sharp_data/test");
+		testDirectories.put("polarity", DATA_DIR +  "preprocessed_data/test");
+		testDirectories.put("conditional", DATA_DIR +  "preprocessed_data/test");
+		testDirectories.put("uncertainty", DATA_DIR +  "preprocessed_data/test");
+		testDirectories.put("subject", DATA_DIR +  "preprocessed_data/test");
+		testDirectories.put("generic", DATA_DIR +  "preprocessed_data/test");
+		testDirectories.put("historyOf", DATA_DIR +  "preprocessed_data/test");
 	}
 		
 	// If you don't want to train/cross-validate everything, comment these out
@@ -79,10 +81,10 @@ public class AssertionConst {
 	// Specify input and output data locations for preprocessing.  Results will be used for model training
 	public static HashMap<String,String> preprocessRootDirectory = new HashMap<String,String>();
 	static { 
-		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Mayo/UMLS_CEM", DATA_DIR + "sharp_data/train");
-		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Seattle Group Health/UMLS_CEM", DATA_DIR + "sharp_data/train");
-		preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "i2b2_data/train");
-		preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "i2b2_data/test");
+		preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/train");
+		preprocessRootDirectory.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/train");
+		//preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR + "i2b2_data/train");
+		//preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR + "i2b2_data/test");
 		
 		// If one of the preprocessRootDirectory entries above is commented out, warn user with a popup
 		if (preprocessRootDirectory.keySet().size()<4) {
@@ -102,20 +104,22 @@ public class AssertionConst {
 	
 	
 	// Specify input and output data locations for preprocessing.  Results will be used for model test
+	// The map maps input dir to output dir
 	public static HashMap<String,String> preprocessForTest = new HashMap<String,String>();
 	static { 
-		preprocessForTest.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR + "sharp_data/test");
-		preprocessForTest.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR + "sharp_data/test");
+		preprocessForTest.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/test");
+		preprocessForTest.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/test");
 	}
 
 	// Specify input and output data locations for preprocessing.  Results will be used for model dev
+	// The map maps input dir to output dir
 	public static HashMap<String,String> preprocessForDev = new HashMap<String,String>();
 	static { 
-		preprocessForDev.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR + "sharp_data/dev");
-		preprocessForDev.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR + "sharp_data/dev");
+		preprocessForDev.put(SHARP_SEED_CORPUS + "Mayo/UMLS_CEM", DATA_DIR + "preprocessed_data/dev");
+		preprocessForDev.put(SHARP_SEED_CORPUS + "Seattle Group Health/UMLS_CEM", DATA_DIR + "preprocessed_data/dev");
 	}
 		
-	public static String evalOutputDir = "sharp_data/output";
+	public static String evalOutputDir =  DATA_DIR + "processing_output_aka_eval_output";
 	
-	public static String instanceGatheringOutputDir = "sharp_data/output_instancegathering";
+	public static String instanceGatheringOutputDir =  DATA_DIR + "q_output_instance_gathering";
 }