You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/06/26 22:58:28 UTC

svn commit: r1497093 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: attributes/features/ attributes/subject/ eval/ medfacts/cleartk/ train/ util/

Author: swu
Date: Wed Jun 26 20:58:27 2013
New Revision: 1497093

URL: http://svn.apache.org/r1497093
Log:
assertion module updated so that subject, generic, historyof actually use the correct feature extractors.  also, some directory renaming, etc, in AssertionConst for easy running.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java Wed Jun 26 20:58:27 2013
@@ -56,11 +56,12 @@ public class SubjectFeaturesExtractor im
 	    
 	    HashMap<String, Boolean> featsMap = SubjectAttributeClassifier.extract(jCas, arg);
 
-	    // Pull in all the features that were used for the rule-based module
-	    features.addAll( hashToFeatureList(featsMap) );
-	    // Pull in the result of the rule-based module as well
-	    features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap)));
-
+	    if (!featsMap.isEmpty()) {
+	    	// Pull in all the features that were used for the rule-based module
+	    	features.addAll( hashToFeatureList(featsMap) );
+	    	// Pull in the result of the rule-based module as well
+	    	features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap)));
+	    }
 	    
 	    return features;
 	}

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java Wed Jun 26 20:58:27 2013
@@ -121,7 +121,7 @@ public class SubjectAttributeClassifier 
 //				vfeat.put(feat, null);
 //			}
 //			return vfeat;
-			return null;
+			return new HashMap<String,Boolean>();
 		}
 				
 		// get any SRL arguments

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Wed Jun 26 20:58:27 2013
@@ -256,8 +256,10 @@ protected static Options options = new O
     	String[] dirs = options.trainDirectory.split("[;:]");
     	for (String dir : dirs) {
     		File trainDir = new File(dir);
-    		trainFiles.addAll(Arrays.asList(trainDir.listFiles()));
-    		//    	System.out.println(trainFiles.toString());
+    		if (trainDir.listFiles()!=null) {
+    			trainFiles.addAll(Arrays.asList(trainDir.listFiles()));
+        		//    	System.out.println(trainFiles.toString());
+    		}
     	}
     }
     //File modelsDir = new File("models/modifier");

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -54,16 +54,17 @@ public class GenericCleartkAnalysisEngin
 
 	private void initialize_generic_extractor() throws ResourceInitializationException {
 		
-		if (this.contextFeatureExtractors==null) {
-			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-		}
-		this.contextFeatureExtractors.add( 
-				new CleartkExtractor(
-						IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
+//		if (this.contextFeatureExtractors==null) {
+//			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+//		}
+//		this.contextFeatureExtractors.add( 
+//				new CleartkExtractor(
+//						IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
 		if(this.entityFeatureExtractors == null){
 			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
 		}
 		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/generic.txt"));
+		this.entityFeatureExtractors.add(new GenericFeaturesExtractor());
 	}
 	
 	@Override

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -55,17 +55,18 @@ public class HistoryCleartkAnalysisEngin
 
 	private void initialize_history_extractor() throws ResourceInitializationException {
 		
-		if (this.contextFeatureExtractors==null) {
-			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-		}
-		this.contextFeatureExtractors.add( 
-				new CleartkExtractor(
-						IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
-		
+//		if (this.contextFeatureExtractors==null) {
+//			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+//		}
+//		this.contextFeatureExtractors.add( 
+//				new CleartkExtractor(
+//						IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
+//		
 		if(this.entityFeatureExtractors == null){
 			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
 		}
 		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/history.txt"));
+		this.entityFeatureExtractors.add(new HistoryFeaturesExtractor());
 	}
 	
 	@Override

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -54,13 +54,14 @@ public class SubjectCleartkAnalysisEngin
 
 	private void initialize_subject_extractor() {
 		
-		if (this.contextFeatureExtractors==null) {
-			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
-		}
-		this.contextFeatureExtractors.add( 
-				new CleartkExtractor(
-						IdentifiedAnnotation.class, new SubjectFeaturesExtractor()) );
+//		if (this.contextFeatureExtractors==null) {
+//			this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+//		}
+//		this.contextFeatureExtractors.add( 
+//				new CleartkExtractor(
+//						IdentifiedAnnotation.class, new SubjectFeaturesExtractor()) );
 				
+		this.entityFeatureExtractors.add( new SubjectFeaturesExtractor());
 	}
 	
 	@Override

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -17,7 +17,7 @@ public class CrossValidateAttributeModel
 			ArrayList<String> params = new ArrayList<String>();
 
 			params.add("--train-dir"); 			params.add(AssertionConst.trainingDirectories.get(attribute));
-			params.add("--models-dir"); 		params.add("sharp_data/model/eval.model");
+			params.add("--models-dir"); 		params.add(AssertionConst.modelDirectory);
 			params.add("--cross-validation"); 	params.add("5");
 			
 			// Build up an "ignore" string

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -1,5 +1,6 @@
 package org.apache.ctakes.assertion.train;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.HashMap;
 
@@ -17,14 +18,26 @@ public class ReadAndPreprocessForAttribu
 			ArrayList<String> params = new ArrayList<String>();
 
 			// Always preprocess something to a main directory, usually for training
-			params.add("--train-dir"); 		params.add(AssertionConst.preprocessRootDirectory.get(source));
+			String froot = AssertionConst.preprocessRootDirectory.get(source);
+			if (!(new File(froot).exists())) {
+				(new File(froot)).createNewFile();
+			}
+			params.add("--train-dir"); 		params.add(froot);
 
 			// Some corpora (SHARP) may have predetermined dev/test splits. Check AssertionConst.
 			if (AssertionConst.preprocessForDev.containsKey(source) ) {
-				params.add("--dev-dir"); 	params.add(AssertionConst.preprocessForDev.get(source));
+				String fdev = AssertionConst.preprocessRootDirectory.get(source);
+				if (!(new File(fdev).exists())) {
+					(new File(fdev)).createNewFile();
+				}
+				params.add("--dev-dir"); 	params.add(fdev);
 			}
 			if (AssertionConst.preprocessForTest.containsKey(source) ) {
-				params.add("--test-dir"); 	params.add(AssertionConst.preprocessForTest.get(source));
+				String ftest = AssertionConst.preprocessRootDirectory.get(source);
+				if (!(new File(ftest).exists())) {
+					(new File(ftest)).createNewFile();
+				}
+				params.add("--test-dir"); 	params.add(ftest);
 			}
 			
 			// Specify preprocessing directory (See AssertionConst)

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -18,7 +18,7 @@ public class TrainAttributeModels {
 
 			params.add("--train-dir"); 	params.add(AssertionConst.trainingDirectories.get(attribute));
 //			params.add("--test-dir"); 	params.add("sharp_data/dev");
-			params.add("--models-dir"); params.add("sharp_data/model/eval.model");
+			params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
 //			params.add("--evaluation-output-dir");	params.add("sharp_data/output"); 
 			params.add("--train-only"); 
 			

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Jun 26 20:58:27 2013
@@ -10,23 +10,35 @@ public class AssertionConst {
 	// Locally-stored data models
 	
 	// expects subdirectories: "Mayo/UMLS_CEM/*batch*/Knowtator*" "Seattle Group Health/UMLS_CEM/*batch*/Knowtator*"
-	public static final String SHARP_SEED_CORPUS = "/Users/m081914/work/data/sharp/Seed Corpus";
+	public static final String SHARP_SEED_CORPUS = "/Users/m081914/work/data/sharp/Seed Corpus/";
 	// expects subdirectories: ast, txt 
-	public static final String I2B2_2010_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain";
+	public static final String I2B2_2010_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain/";
 	// expects subdirectories: ast, txt
-	public static final String I2B2_2010_TEST_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/Test/reports";
+	public static final String I2B2_2010_TEST_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/Test/reports/";
 
+	// raw and processed text, expects subdirectories for different sources, then subsubdirectories for train/test/dev
+	public static final String DATA_DIR = "/Users/m081914/work/data/assertion/";
+	
+	// specify the model to write (train/crossvalidate) or read (test/crossvalidate).
+	//  please rename for different configurations of training data 
+	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+//	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+//	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain";
+//	public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
+//	public static String modelDirectory = "../ctakes-assertion-res/resources/model/i2b2train";
+
+	
 	// Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train"
 	public static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
 	static { 
-		trainingDirectories.put("polarity","sharp_data/train:i2b2_data/train");
+		trainingDirectories.put("polarity",DATA_DIR+"fromtim_sharp_data/train");
 //		trainingDirectories.put("polarity","sharp_data/train");
 //		trainingDirectories.put("polarity","i2b2_data/train");
-		trainingDirectories.put("conditional","sharp_data/train");
-		trainingDirectories.put("uncertainty","sharp_data/train");
-		trainingDirectories.put("subject","sharp_data/train");
-		trainingDirectories.put("generic","sharp_data/train");
-		trainingDirectories.put("historyOf","sharp_data/train");
+		trainingDirectories.put("conditional",DATA_DIR+"fromtim_sharp_data/train");
+		trainingDirectories.put("uncertainty",DATA_DIR+"fromtim_sharp_data/train");
+		trainingDirectories.put("subject",DATA_DIR+"fromtim_sharp_data/train");
+		trainingDirectories.put("generic",DATA_DIR+"fromtim_sharp_data/train");
+		trainingDirectories.put("historyOf",DATA_DIR+"fromtim_sharp_data/train");
 	}
 		
 	// If you don't want to train/cross-validate everything, comment these out
@@ -46,25 +58,24 @@ public class AssertionConst {
 	// Specify input and output data locations for preprocessing.  Results will be used for model training
 	public static HashMap<String,String> preprocessRootDirectory = new HashMap<String,String>();
 	static { 
-		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/train");
-		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/train");
-		preprocessRootDirectory.put(I2B2_2010_CORPUS,"i2b2_data/train");
-		preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS,"i2b2_data/test");
+		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Mayo/UMLS_CEM", DATA_DIR+"sharp_data/train");
+		preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/train");
+		preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR+"i2b2_data/train");
+		preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR+"i2b2_data/test");
 	}
 	
 	// Specify input and output data locations for preprocessing.  Results will be used for model test
 	public static HashMap<String,String> preprocessForTest = new HashMap<String,String>();
 	static { 
-		preprocessForTest.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/test");
-		preprocessForTest.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/test");
+		preprocessForTest.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR+"sharp_data/test");
+		preprocessForTest.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/test");
 	}
 
 	// Specify input and output data locations for preprocessing.  Results will be used for model dev
 	public static HashMap<String,String> preprocessForDev = new HashMap<String,String>();
 	static { 
-		preprocessForDev.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/dev");
-		preprocessForDev.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/dev");
+		preprocessForDev.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR+"sharp_data/dev");
+		preprocessForDev.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/dev");
 	}
-	
-	
+		
 }