You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/10/14 00:27:57 UTC
svn commit: r1631590 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java
Author: tmill
Date: Mon Oct 13 22:27:56 2014
New Revision: 1631590
URL: http://svn.apache.org/r1631590
Log:
CTAKES-82: Minor upgrades to meta time evaluator, including fixing indentation.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java?rev=1631590&r1=1631589&r2=1631590&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java Mon Oct 13 22:27:56 2014
@@ -48,23 +48,30 @@ import org.cleartk.ml.jar.JarClassifierB
import com.google.common.collect.Maps;
import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
public class EvaluationOfMetaTimeExpressionExtractor extends EvaluationOfAnnotationSpans_ImplBase {
public static int nFolds = 5;
private List<Integer> allTrain = null;
+ private boolean skipTrainComponents = false;
+ interface MetaOptions extends Options {
+ @Option
+ boolean getSkipTrainComponents();
+ }
+
public EvaluationOfMetaTimeExpressionExtractor(File baseDirectory,
File rawTextDirectory, File xmlDirectory,
org.apache.ctakes.temporal.eval.Evaluation_ImplBase.XMLFormat xmlFormat,
File xmiDirectory, File treebankDirectory,
List<Integer> allTrain, Class<? extends Annotation> annotationClass) {
super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory,
- treebankDirectory, annotationClass);
+ treebankDirectory, null, annotationClass);
this.allTrain = allTrain;
}
public static void main(String[] args) throws Exception {
- Options options = CliFactory.parseArguments(Options.class, args);
+ MetaOptions options = CliFactory.parseArguments(MetaOptions.class, args);
List<Integer> patientSets = options.getPatients().getList();
List<Integer> trainItems = null;
List<Integer> devItems = null;
@@ -98,70 +105,78 @@ public class EvaluationOfMetaTimeExpress
options.getTreebankDirectory(),
allTrain,
TimeMention.class);
+ if(options.getSkipTrainComponents()) eval.setSkipTrainComponents(true);
if(options.getI2B2Output()!=null) eval.setI2B2Output(options.getI2B2Output());
+ if(options.getPrintOverlappingSpans()) eval.printOverlapping = true;
AnnotationStatistics<String> stats = eval.trainAndTest(allTrain, allTest);
System.out.println(stats.toString());
}
+ private void setSkipTrainComponents(boolean skip) {
+ this.skipTrainComponents = skip;
+ }
+
@Override
protected void train(CollectionReader collectionReader, File directory)
throws Exception {
- Class<? extends JCasAnnotator_ImplBase>[] annotatorClasses = MetaTimeAnnotator.getComponents();
-
- // add more annotator types?
- Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments = Maps.newHashMap();
- annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.3"});
- annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
- annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c", "0.3"});
- annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.03"});
-
- JCasIterator[] casIters = new JCasIterator[nFolds];
- for (int fold = 0; fold < nFolds; ++fold) {
- List<Integer> xfoldTrain = selectTrainItems(allTrain, nFolds, fold);
- List<Integer> xfoldTest = selectTestItems(allTrain, nFolds, fold);
- AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
- File modelDirectory = getModelDirectory(new File("target/eval/time-spans/fold_"+fold));
- for (Class<? extends JCasAnnotator_ImplBase> annotatorClass : annotatorClasses) {
- EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
- new File("target/eval/time-spans/" ),
- this.rawTextDirectory,
- this.xmlDirectory,
- this.xmlFormat,
- this.xmiDirectory,
- this.treebankDirectory,
- 1,
- 0,
- annotatorClass,
- false,
- annotatorTrainingArguments.get(annotatorClass));
- evaluation.prepareXMIsFor(allTrain);
- String name = String.format("%s.errors", annotatorClass.getSimpleName());
- evaluation.setLogging(Level.FINE, new File("target/eval", name));
-
- // train on 4 of the folds of the training data:
- evaluation.train(evaluation.getCollectionReader(xfoldTrain), modelDirectory);
- if(fold == 0){
- // train the main model as well:
- evaluation.train(evaluation.getCollectionReader(allTrain), directory);
+ if(!this.skipTrainComponents){
+ Class<? extends JCasAnnotator_ImplBase>[] annotatorClasses = MetaTimeAnnotator.getComponents();
+
+ // add more annotator types?
+ Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments = Maps.newHashMap();
+ annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.1"});
+ annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
+ annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c", "0.3"});
+ annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.3"});
+
+ JCasIterator[] casIters = new JCasIterator[nFolds];
+ for (int fold = 0; fold < nFolds; ++fold) {
+ List<Integer> xfoldTrain = selectTrainItems(allTrain, nFolds, fold);
+ List<Integer> xfoldTest = selectTestItems(allTrain, nFolds, fold);
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+ File modelDirectory = getModelDirectory(new File("target/eval/time-spans/fold_"+fold));
+ for (Class<? extends JCasAnnotator_ImplBase> annotatorClass : annotatorClasses) {
+ EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
+ new File("target/eval/time-spans/" ),
+ this.rawTextDirectory,
+ this.xmlDirectory,
+ this.xmlFormat,
+ this.xmiDirectory,
+ this.treebankDirectory,
+ 1,
+ 0,
+ annotatorClass,
+ false,
+ annotatorTrainingArguments.get(annotatorClass));
+ evaluation.prepareXMIsFor(allTrain);
+ String name = String.format("%s.errors", annotatorClass.getSimpleName());
+ evaluation.setLogging(Level.FINE, new File("target/eval", name));
+
+ // train on 4 of the folds of the training data:
+ evaluation.train(evaluation.getCollectionReader(xfoldTrain), modelDirectory);
+ if(fold == 0){
+ // train the main model as well:
+ evaluation.train(evaluation.getCollectionReader(allTrain), directory);
+ }
+
}
-
+ casIters[fold] = new JCasIterator(getCollectionReader(xfoldTest), aggregateBuilder.createAggregate());
}
- casIters[fold] = new JCasIterator(getCollectionReader(xfoldTest), aggregateBuilder.createAggregate());
- }
- // run meta data-writer for this fold:
- AggregateBuilder writerBuilder = new AggregateBuilder();
- writerBuilder.add(CopyFromGold.getDescription(TimeMention.class));
- writerBuilder.add(this.getDataWriterDescription(directory));
- AnalysisEngine writer = writerBuilder.createAggregate();
- for(JCasIterator casIter : casIters){
- while(casIter.hasNext()){
- JCas jcas = casIter.next();
- SimplePipeline.runPipeline(jcas, writer);
+ // run meta data-writer for this fold:
+ AggregateBuilder writerBuilder = new AggregateBuilder();
+ writerBuilder.add(CopyFromGold.getDescription(TimeMention.class));
+ writerBuilder.add(this.getDataWriterDescription(directory));
+ AnalysisEngine writer = writerBuilder.createAggregate();
+ for(JCasIterator casIter : casIters){
+ while(casIter.hasNext()){
+ JCas jcas = casIter.next();
+ SimplePipeline.runPipeline(jcas, writer);
+ }
}
+ writer.collectionProcessComplete();
}
- writer.collectionProcessComplete();
- JarClassifierBuilder.trainAndPackage(getModelDirectory(directory), new String[]{"-p", "c2=0.3"});
+ JarClassifierBuilder.trainAndPackage(getModelDirectory(directory), new String[]{"-p", "c2=3.0"});
}
private static List<Integer> selectTrainItems(List<Integer> items, int numFolds, int fold) {
@@ -193,6 +208,7 @@ public class EvaluationOfMetaTimeExpress
@Override
protected void trainAndPackage(File directory) throws Exception {
+ System.err.println("\n\n***\n\n\nChanging the classifier setup\n\n\n");
JarClassifierBuilder.trainAndPackage(getModelDirectory(directory), "-p", "c2=0.3");
}