You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/06/18 21:30:10 UTC
svn commit: r1494265 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/ pipelines/ train/
Author: swu
Date: Tue Jun 18 19:30:10 2013
New Revision: 1494265
URL: http://svn.apache.org/r1494265
Log:
possible to train assertion models with different data sources
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1494265&r1=1494264&r2=1494265&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Tue Jun 18 19:30:10 2013
@@ -237,7 +237,8 @@ protected static Options options = new O
public static void main(String[] args) throws Exception {
//Options options = new Options();
- options.parseOptions(args);
+ resetOptions();
+ options.parseOptions(args);
// System.err.println("forcing skipping of subject processing!!!");
// options.runSubject = false;
@@ -342,21 +343,48 @@ protected static Options options = new O
CollectionReader trainCollectionReader = evaluation.getCollectionReader(trainFiles);
evaluation.train(trainCollectionReader, modelsDir);
}
- if (testFiles==null || testFiles.size()==0) {
- throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ;
- }
- logger.debug("testFiles.size() = " + testFiles.size());
- CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
- Map<String, AnnotationStatistics> stats = evaluation.test(testCollectionReader, modelsDir);
- AssertionEvaluation.printScore(stats, modelsDir.getAbsolutePath());
+ if (!options.trainOnly) {
+ if (testFiles==null || testFiles.size()==0) {
+ throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ;
+ }
+ logger.debug("testFiles.size() = " + testFiles.size());
+ CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
+ Map<String, AnnotationStatistics> stats = evaluation.test(testCollectionReader, modelsDir);
+
+ AssertionEvaluation.printScore(stats, modelsDir.getAbsolutePath());
+ }
}
System.out.println("Finished assertion module.");
}
- private static void printOptionsForDebugging(Options options)
+ private static void resetOptions() {
+ options.ignoreConditional = false;
+ options.ignoreGeneric = false;
+ options.ignoreHistory = false;
+ options.ignorePolarity = false;
+ options.ignoreSubject = false;
+ options.ignoreUncertainty = false;
+
+ options.trainOnly = false;
+ options.testOnly = false;
+ options.noCleartk = false;
+ options.printErrors = false;
+ options.evalOnly = false;
+
+ options.evaluationOutputDirectory = null;
+ options.trainDirectory = null;
+ options.testDirectory = null;
+ options.devDirectory = null;
+ options.modelsDirectory = null;
+ options.preprocessDir = null;
+
+ options.crossValidationFolds = null;
+ }
+
+private static void printOptionsForDebugging(Options options)
{
System.out.format(
"training dir: %s%n" +
@@ -442,6 +470,7 @@ public static void printScore(Map<String
trainDir = new File(options.trainDirectory);
}
if (preprocessDir.getName().contains("i2b2")) {
+ GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(preprocessDir, trainDir);
} else {
GoldEntityAndAttributeReaderPipelineForSeedCorpus.readSharpUmlsCem(
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java?rev=1494265&r1=1494264&r2=1494265&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java Tue Jun 18 19:30:10 2013
@@ -37,6 +37,7 @@ import org.uimafit.factory.TypeSystemDes
import org.uimafit.pipeline.SimplePipeline;
import org.apache.ctakes.assertion.cr.GoldEntityAndAttributeReader;
+import org.apache.ctakes.assertion.cr.I2B2Challenge2010CollectionReader;
import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
@@ -207,4 +208,46 @@ public class GoldEntityAndAttributeReade
logger.info("Finished!");
}
+
+ public static void readI2B2Challenge2010(File parentDirectory, File trainDirectory)
+ throws ResourceInitializationException, UIMAException, IOException {
+
+ TypeSystemDescription typeSystemDescription =
+ // use the uimafit method of finding available type system
+ // descriptor via META-INF/org.uimafit/types.txt
+ // (found in ctakes-type-system/src/main/resources)
+ TypeSystemDescriptionFactory.createTypeSystemDescription();
+
+ AggregateBuilder aggregate = new AggregateBuilder();
+
+ CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
+ I2B2Challenge2010CollectionReader.class,
+ typeSystemDescription,
+ "inputDir",
+ parentDirectory
+ );
+
+ // fill in other values that are necessary for preprocessing
+ AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
+ "desc/analysis_engine/AttributeDiscoveryPreprocessor"
+ );
+ aggregate.add(preprocessAnnotator);
+
+ if (trainDirectory!=null) {
+ AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
+ XWriter.class,
+ typeSystemDescription,
+ XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+ trainDirectory,
+ XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+ CtakesFileNamer.class.getName()
+ );
+ aggregate.add(xWriter2);
+ // SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
+ }
+
+ SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+ logger.info("Finished!");
+ }
+
}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java?rev=1494265&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java Tue Jun 18 19:30:10 2013
@@ -0,0 +1,64 @@
+package org.apache.ctakes.assertion.train;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+
+import scala.actors.threadpool.Arrays;
+
+public class CrossValidateAttributeModels {
+
+ private static ArrayList<String> annotationTypes = new ArrayList<String>();
+ static {
+ annotationTypes.add("polarity");
+ annotationTypes.add("conditional");
+ annotationTypes.add("uncertainty");
+ annotationTypes.add("subject");
+ annotationTypes.add("generic");
+ annotationTypes.add("historyOf");
+ }
+
+ // Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train"
+ private static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
+ static {
+ trainingDirectories.put("polarity","sharp_data/dev");
+ trainingDirectories.put("conditional","sharp_data/dev");
+ trainingDirectories.put("uncertainty","sharp_data/dev");
+ trainingDirectories.put("subject","sharp_data/dev");
+ trainingDirectories.put("generic","sharp_data/dev");
+ trainingDirectories.put("historyOf","sharp_data/dev");
+ }
+ public static void main(String[] args) throws Exception {
+
+ for (String attribute : annotationTypes) {
+
+ ArrayList<String> params = new ArrayList<String>();
+
+ params.add("--train-dir"); params.add(trainingDirectories.get(attribute));
+ params.add("--models-dir"); params.add("sharp_data/model/eval.model");
+ params.add("--cross-validation"); params.add("5");
+
+ // Build up an "ignore" string
+ for (String ignoreAttribute : annotationTypes) {
+ if (!ignoreAttribute.equals(attribute)) {
+
+ if (ignoreAttribute.equals("historyOf")) {
+ ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+ }
+
+ params.add("--ignore-" + ignoreAttribute);
+ }
+ }
+ String[] paramList = params.toArray(new String[]{});
+
+ System.out.println(Arrays.asList(paramList).toString());
+
+ // Run the actual assertion training on just one attribute
+ AssertionEvaluation.main( paramList );
+ }
+
+
+
+ }
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java?rev=1494265&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java Tue Jun 18 19:30:10 2013
@@ -0,0 +1,66 @@
+package org.apache.ctakes.assertion.train;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+
+import scala.actors.threadpool.Arrays;
+
+public class TrainAttributeModels {
+
+ private static ArrayList<String> annotationTypes = new ArrayList<String>();
+ static {
+ annotationTypes.add("polarity");
+ annotationTypes.add("conditional");
+ annotationTypes.add("uncertainty");
+ annotationTypes.add("subject");
+ annotationTypes.add("generic");
+ annotationTypes.add("historyOf");
+ }
+
+ // Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train"
+ private static HashMap<String,String> trainingDirectories = new HashMap<String,String>();
+ static {
+ trainingDirectories.put("polarity","sharp_data/train:i2b2_data/train");
+ trainingDirectories.put("conditional","sharp_data/train");
+ trainingDirectories.put("uncertainty","sharp_data/train");
+ trainingDirectories.put("subject","sharp_data/train");
+ trainingDirectories.put("generic","sharp_data/train");
+ trainingDirectories.put("historyOf","sharp_data/train");
+ }
+ public static void main(String[] args) throws Exception {
+
+ for (String attribute : annotationTypes) {
+
+ ArrayList<String> params = new ArrayList<String>();
+
+ params.add("--train-dir"); params.add(trainingDirectories.get(attribute));
+// params.add("--test-dir"); params.add("sharp_data/dev");
+ params.add("--models-dir"); params.add("sharp_data/model/eval.model");
+// params.add("--evaluation-output-dir"); params.add("sharp_data/output");
+ params.add("--train-only");
+
+ // Build up an "ignore" string
+ for (String ignoreAttribute : annotationTypes) {
+ if (!ignoreAttribute.equals(attribute)) {
+
+ if (ignoreAttribute.equals("historyOf")) {
+ ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+ }
+
+ params.add("--ignore-" + ignoreAttribute);
+ }
+ }
+ String[] paramList = params.toArray(new String[]{});
+
+ System.out.println(Arrays.asList(paramList).toString());
+
+ // Run the actual assertion training on just one attribute
+ AssertionEvaluation.main( paramList );
+ }
+
+
+
+ }
+}