You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/08/19 20:17:09 UTC
svn commit: r1515543 - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion:
eval/AssertionEvaluation.java
pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
Author: swu
Date: Mon Aug 19 18:17:09 2013
New Revision: 1515543
URL: http://svn.apache.org/r1515543
Log:
trying to get mipacq and negex test set to read and train/test correctly
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1515543&r1=1515542&r2=1515543&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon Aug 19 18:17:09 2013
@@ -544,20 +544,26 @@ public static void printScore(Map<String
paths);
}
- public static void preprocess(File preprocessDir ) throws ResourceInitializationException, UIMAException, IOException {
+ public static void preprocess(File rawDir ) throws ResourceInitializationException, UIMAException, IOException {
// File devDirectory = new File(options.trainDirectory.getParentFile() + File.separator + "dev");
- File trainDir = null;
+ File preprocessedDir = null;
if (options.trainDirectory.split("[;:]").length>1) {
throw new IOException("Assertion preprocess wants to write to one train directory, but you've supplied multiple: " + options.trainDirectory);
} else {
- trainDir = new File(options.trainDirectory);
+ preprocessedDir = new File(options.trainDirectory);
}
- if (preprocessDir.getAbsolutePath().contains("i2b2")) {
- GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(preprocessDir, trainDir);
+ if (rawDir.getAbsolutePath().contains("i2b2")) {
+ GoldEntityAndAttributeReaderPipelineForSeedCorpus.readI2B2Challenge2010(rawDir, preprocessedDir);
- } else {
+ } else if (rawDir.getAbsolutePath().contains("mipacq")) {
+ GoldEntityAndAttributeReaderPipelineForSeedCorpus.readMiPACQ(rawDir, preprocessedDir);
+
+ } else if (rawDir.getAbsolutePath().contains("negex")) {
+ GoldEntityAndAttributeReaderPipelineForSeedCorpus.readNegexTestSet(rawDir, preprocessedDir);
+
+ } else{
GoldEntityAndAttributeReaderPipelineForSeedCorpus.readSharpUmlsCem(
- preprocessDir, trainDir, options.testDirectory, options.devDirectory);
+ rawDir, preprocessedDir, options.testDirectory, options.devDirectory);
}
}
@@ -1182,8 +1188,8 @@ private static void printErrors(JCas jCa
} else {
if(!goldLabel.equals(systemLabel)){
if(trueCategory == null){
- // used for multi-class case:
- System.out.println(classifierType+" Incorrectly labeled as " + systemLabel + " when the example was " + goldLabel + ": " + formatError(jCas, goldAnnotation));
+ // used for multi-class case. Incorrect_system_label(Correct_label):
+ System.out.println(classifierType+ " "+ systemLabel + "(" + goldLabel + "): " + formatError(jCas, systemAnnotation));
}else if(systemLabel.equals(trueCategory)){
System.out.println(classifierType+" FP: " + typeId + " " + formatError(jCas, systemAnnotation) + "| gold:|" + formatError(jCas, goldAnnotation) + " " + documentId);
}else{
@@ -1192,6 +1198,7 @@ private static void printErrors(JCas jCa
}else{
if(trueCategory == null){
// multi-class case -- probably don't want to print anything?
+ System.out.println(classifierType+ " "+ systemLabel + "(" + goldLabel + "): " + formatError(jCas, systemAnnotation));
}else if(systemLabel.equals(trueCategory)){
System.out.println(classifierType+" TP: " + typeId + " " + formatError(jCas, systemAnnotation) + "| gold:|" + formatError(jCas, goldAnnotation) + " " + documentId);
}else{
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java?rev=1515543&r1=1515542&r2=1515543&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java Mon Aug 19 18:17:09 2013
@@ -38,6 +38,8 @@ import org.uimafit.pipeline.SimplePipeli
import org.apache.ctakes.assertion.cr.GoldEntityAndAttributeReader;
import org.apache.ctakes.assertion.cr.I2B2Challenge2010CollectionReader;
+import org.apache.ctakes.assertion.cr.MiPACQKnowtatorXMLReader;
+import org.apache.ctakes.assertion.cr.NegExCorpusReader;
import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
@@ -209,7 +211,7 @@ public class GoldEntityAndAttributeReade
logger.info("Finished!");
}
- public static void readI2B2Challenge2010(File parentDirectory, File trainDirectory)
+ public static void readI2B2Challenge2010(File parentDirectory, File preprocessedDirectory)
throws ResourceInitializationException, UIMAException, IOException {
TypeSystemDescription typeSystemDescription =
@@ -233,12 +235,96 @@ public class GoldEntityAndAttributeReade
);
aggregate.add(preprocessAnnotator);
- if (trainDirectory!=null) {
+ if (preprocessedDirectory!=null) {
AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
XWriter.class,
typeSystemDescription,
XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
- trainDirectory,
+ preprocessedDirectory,
+ XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+ CtakesFileNamer.class.getName()
+ );
+ aggregate.add(xWriter2);
+ // SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
+ }
+
+ SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+ logger.info("Finished!");
+ }
+
+ public static void readNegexTestSet(File inputFile, File preprocessedDirectory)
+ throws ResourceInitializationException, UIMAException, IOException {
+
+ TypeSystemDescription typeSystemDescription =
+ TypeSystemDescriptionFactory.createTypeSystemDescription();
+
+ AggregateBuilder aggregate = new AggregateBuilder();
+
+ // input dir is hard-coded in AssertionConst
+ CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
+ NegExCorpusReader.class,
+ typeSystemDescription
+ );
+
+ // fill in other values that are necessary for preprocessing
+ AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
+ "desc/analysis_engine/AttributeDiscoveryPreprocessor"
+ );
+ aggregate.add(preprocessAnnotator);
+
+ if (preprocessedDirectory!=null) {
+ AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
+ XWriter.class,
+ typeSystemDescription,
+ XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+ preprocessedDirectory,
+ XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+ CtakesFileNamer.class.getName()
+ );
+ aggregate.add(xWriter2);
+ // SimplePipeline.runPipeline(collectionReader, goldAnnotator, xWriter, xWriter2);
+ }
+
+ SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
+ logger.info("Finished!");
+ }
+
+ public static void readMiPACQ(File inputDirectory, File preprocessedDirectory)
+ throws ResourceInitializationException, UIMAException, IOException {
+
+ TypeSystemDescription typeSystemDescription =
+ TypeSystemDescriptionFactory.createTypeSystemDescription();
+
+ AggregateBuilder aggregate = new AggregateBuilder();
+
+ CollectionReaderDescription collectionReader = CollectionReaderFactory.createDescription(
+ FilesInDirectoryCollectionReader.class,
+ typeSystemDescription,
+ "InputDirectory",
+ inputDirectory
+ );
+
+ // read the UMLS_CEM data from Knowtator
+ AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
+ MiPACQKnowtatorXMLReader.class,
+ typeSystemDescription,
+ MiPACQKnowtatorXMLReader.PARAM_TEXT_DIRECTORY,
+ preprocessedDirectory
+ );
+
+ aggregate.add(goldAnnotator);
+ // fill in other values that are necessary for preprocessing
+ AnalysisEngineDescription preprocessAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(
+ "desc/analysis_engine/AttributeDiscoveryPreprocessor"
+ );
+ aggregate.add(preprocessAnnotator);
+
+ if (preprocessedDirectory!=null) {
+ AnalysisEngineDescription xWriter2 = AnalysisEngineFactory.createPrimitiveDescription(
+ XWriter.class,
+ typeSystemDescription,
+ XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+ preprocessedDirectory,
XWriter.PARAM_FILE_NAMER_CLASS_NAME,
CtakesFileNamer.class.getName()
);