You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/06/26 22:58:28 UTC
svn commit: r1497093 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
attributes/features/ attributes/subject/ eval/ medfacts/cleartk/ train/ util/
Author: swu
Date: Wed Jun 26 20:58:27 2013
New Revision: 1497093
URL: http://svn.apache.org/r1497093
Log:
assertion module updated so that subject, generic, historyof actually use the correct feature extractors. also, some directory renaming, etc, in AssertionConst for easy running.
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java Wed Jun 26 20:58:27 2013
@@ -56,11 +56,12 @@ public class SubjectFeaturesExtractor im
HashMap<String, Boolean> featsMap = SubjectAttributeClassifier.extract(jCas, arg);
- // Pull in all the features that were used for the rule-based module
- features.addAll( hashToFeatureList(featsMap) );
- // Pull in the result of the rule-based module as well
- features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap)));
-
+ if (!featsMap.isEmpty()) {
+ // Pull in all the features that were used for the rule-based module
+ features.addAll( hashToFeatureList(featsMap) );
+ // Pull in the result of the rule-based module as well
+ features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap)));
+ }
return features;
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java Wed Jun 26 20:58:27 2013
@@ -121,7 +121,7 @@ public class SubjectAttributeClassifier
// vfeat.put(feat, null);
// }
// return vfeat;
- return null;
+ return new HashMap<String,Boolean>();
}
// get any SRL arguments
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Wed Jun 26 20:58:27 2013
@@ -256,8 +256,10 @@ protected static Options options = new O
String[] dirs = options.trainDirectory.split("[;:]");
for (String dir : dirs) {
File trainDir = new File(dir);
- trainFiles.addAll(Arrays.asList(trainDir.listFiles()));
- // System.out.println(trainFiles.toString());
+ if (trainDir.listFiles()!=null) {
+ trainFiles.addAll(Arrays.asList(trainDir.listFiles()));
+ // System.out.println(trainFiles.toString());
+ }
}
}
//File modelsDir = new File("models/modifier");
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -54,16 +54,17 @@ public class GenericCleartkAnalysisEngin
private void initialize_generic_extractor() throws ResourceInitializationException {
- if (this.contextFeatureExtractors==null) {
- this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
- }
- this.contextFeatureExtractors.add(
- new CleartkExtractor(
- IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
+// if (this.contextFeatureExtractors==null) {
+// this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+// }
+// this.contextFeatureExtractors.add(
+// new CleartkExtractor(
+// IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
if(this.entityFeatureExtractors == null){
this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
}
this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/generic.txt"));
+ this.entityFeatureExtractors.add(new GenericFeaturesExtractor());
}
@Override
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -55,17 +55,18 @@ public class HistoryCleartkAnalysisEngin
private void initialize_history_extractor() throws ResourceInitializationException {
- if (this.contextFeatureExtractors==null) {
- this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
- }
- this.contextFeatureExtractors.add(
- new CleartkExtractor(
- IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
-
+// if (this.contextFeatureExtractors==null) {
+// this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+// }
+// this.contextFeatureExtractors.add(
+// new CleartkExtractor(
+// IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
+//
if(this.entityFeatureExtractors == null){
this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
}
this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/history.txt"));
+ this.entityFeatureExtractors.add(new HistoryFeaturesExtractor());
}
@Override
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Wed Jun 26 20:58:27 2013
@@ -54,13 +54,14 @@ public class SubjectCleartkAnalysisEngin
private void initialize_subject_extractor() {
- if (this.contextFeatureExtractors==null) {
- this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
- }
- this.contextFeatureExtractors.add(
- new CleartkExtractor(
- IdentifiedAnnotation.class, new SubjectFeaturesExtractor()) );
+// if (this.contextFeatureExtractors==null) {
+// this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+// }
+// this.contextFeatureExtractors.add(
+// new CleartkExtractor(
+// IdentifiedAnnotation.class, new SubjectFeaturesExtractor()) );
+ this.entityFeatureExtractors.add( new SubjectFeaturesExtractor());
}
@Override
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -17,7 +17,7 @@ public class CrossValidateAttributeModel
ArrayList<String> params = new ArrayList<String>();
params.add("--train-dir"); params.add(AssertionConst.trainingDirectories.get(attribute));
- params.add("--models-dir"); params.add("sharp_data/model/eval.model");
+ params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
params.add("--cross-validation"); params.add("5");
// Build up an "ignore" string
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ReadAndPreprocessForAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -1,5 +1,6 @@
package org.apache.ctakes.assertion.train;
+import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
@@ -17,14 +18,26 @@ public class ReadAndPreprocessForAttribu
ArrayList<String> params = new ArrayList<String>();
// Always preprocess something to a main directory, usually for training
- params.add("--train-dir"); params.add(AssertionConst.preprocessRootDirectory.get(source));
+ String froot = AssertionConst.preprocessRootDirectory.get(source);
+ if (!(new File(froot).exists())) {
+ (new File(froot)).createNewFile();
+ }
+ params.add("--train-dir"); params.add(froot);
// Some corpora (SHARP) may have predetermined dev/test splits. Check AssertionConst.
if (AssertionConst.preprocessForDev.containsKey(source) ) {
- params.add("--dev-dir"); params.add(AssertionConst.preprocessForDev.get(source));
+ String fdev = AssertionConst.preprocessRootDirectory.get(source);
+ if (!(new File(fdev).exists())) {
+ (new File(fdev)).createNewFile();
+ }
+ params.add("--dev-dir"); params.add(fdev);
}
if (AssertionConst.preprocessForTest.containsKey(source) ) {
- params.add("--test-dir"); params.add(AssertionConst.preprocessForTest.get(source));
+ String ftest = AssertionConst.preprocessRootDirectory.get(source);
+ if (!(new File(ftest).exists())) {
+ (new File(ftest)).createNewFile();
+ }
+ params.add("--test-dir"); params.add(ftest);
}
// Specify preprocessing directory (See AssertionConst)
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java Wed Jun 26 20:58:27 2013
@@ -18,7 +18,7 @@ public class TrainAttributeModels {
params.add("--train-dir"); params.add(AssertionConst.trainingDirectories.get(attribute));
// params.add("--test-dir"); params.add("sharp_data/dev");
- params.add("--models-dir"); params.add("sharp_data/model/eval.model");
+ params.add("--models-dir"); params.add(AssertionConst.modelDirectory);
// params.add("--evaluation-output-dir"); params.add("sharp_data/output");
params.add("--train-only");
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java?rev=1497093&r1=1497092&r2=1497093&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionConst.java Wed Jun 26 20:58:27 2013
@@ -10,23 +10,35 @@ public class AssertionConst {
// Locally-stored data models
// expects subdirectories: "Mayo/UMLS_CEM/*batch*/Knowtator*" "Seattle Group Health/UMLS_CEM/*batch*/Knowtator*"
- public static final String SHARP_SEED_CORPUS = "/Users/m081914/work/data/sharp/Seed Corpus";
+ public static final String SHARP_SEED_CORPUS = "/Users/m081914/work/data/sharp/Seed Corpus/";
// expects subdirectories: ast, txt
- public static final String I2B2_2010_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain";
+ public static final String I2B2_2010_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/i2b2Challenge2010AllTrain/";
// expects subdirectories: ast, txt
- public static final String I2B2_2010_TEST_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/Test/reports";
+ public static final String I2B2_2010_TEST_CORPUS = "/Users/m081914/work/data/i2b2Challenge2010/Data/Test/reports/";
+ // raw and processed text, expects subdirectories for different sources, then subsubdirectories for train/test/dev
+ public static final String DATA_DIR = "/Users/m081914/work/data/assertion/";
+
+ // specify the model to write (train/crossvalidate) or read (test/crossvalidate).
+ // please rename for different configurations of training data
+ public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain-xval";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train";
+// public static String modelDirectory = "../ctakes-assertion-res/resources/model/i2b2train";
+
+
// Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train"
public static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
static {
- trainingDirectories.put("polarity","sharp_data/train:i2b2_data/train");
+ trainingDirectories.put("polarity",DATA_DIR+"fromtim_sharp_data/train");
// trainingDirectories.put("polarity","sharp_data/train");
// trainingDirectories.put("polarity","i2b2_data/train");
- trainingDirectories.put("conditional","sharp_data/train");
- trainingDirectories.put("uncertainty","sharp_data/train");
- trainingDirectories.put("subject","sharp_data/train");
- trainingDirectories.put("generic","sharp_data/train");
- trainingDirectories.put("historyOf","sharp_data/train");
+ trainingDirectories.put("conditional",DATA_DIR+"fromtim_sharp_data/train");
+ trainingDirectories.put("uncertainty",DATA_DIR+"fromtim_sharp_data/train");
+ trainingDirectories.put("subject",DATA_DIR+"fromtim_sharp_data/train");
+ trainingDirectories.put("generic",DATA_DIR+"fromtim_sharp_data/train");
+ trainingDirectories.put("historyOf",DATA_DIR+"fromtim_sharp_data/train");
}
// If you don't want to train/cross-validate everything, comment these out
@@ -46,25 +58,24 @@ public class AssertionConst {
// Specify input and output data locations for preprocessing. Results will be used for model training
public static HashMap<String,String> preprocessRootDirectory = new HashMap<String,String>();
static {
- preprocessRootDirectory.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/train");
- preprocessRootDirectory.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/train");
- preprocessRootDirectory.put(I2B2_2010_CORPUS,"i2b2_data/train");
- preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS,"i2b2_data/test");
+ preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Mayo/UMLS_CEM", DATA_DIR+"sharp_data/train");
+ preprocessRootDirectory.put(SHARP_SEED_CORPUS+"Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/train");
+ preprocessRootDirectory.put(I2B2_2010_CORPUS, DATA_DIR+"i2b2_data/train");
+ preprocessRootDirectory.put(I2B2_2010_TEST_CORPUS, DATA_DIR+"i2b2_data/test");
}
// Specify input and output data locations for preprocessing. Results will be used for model test
public static HashMap<String,String> preprocessForTest = new HashMap<String,String>();
static {
- preprocessForTest.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/test");
- preprocessForTest.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/test");
+ preprocessForTest.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR+"sharp_data/test");
+ preprocessForTest.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/test");
}
// Specify input and output data locations for preprocessing. Results will be used for model dev
public static HashMap<String,String> preprocessForDev = new HashMap<String,String>();
static {
- preprocessForDev.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM","sharp_data/dev");
- preprocessForDev.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM","sharp_data/dev");
+ preprocessForDev.put(SHARP_SEED_CORPUS+"/Mayo/UMLS_CEM", DATA_DIR+"sharp_data/dev");
+ preprocessForDev.put(SHARP_SEED_CORPUS+"/Seattle Group Health/UMLS_CEM", DATA_DIR+"sharp_data/dev");
}
-
-
+
}