You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/09 00:06:24 UTC

svn commit: r1585848 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/MetaTimeAnnotator.java eval/EvaluationOfMetaTimeExpressionExtractor.java

Author: tmill
Date: Tue Apr  8 22:06:24 2014
New Revision: 1585848

URL: http://svn.apache.org/r1585848
Log:
CTAKES-82: Meta-time expression extractor fixed, made a separate class.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java?rev=1585848&r1=1585847&r2=1585848&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java Tue Apr  8 22:06:24 2014
@@ -25,6 +25,7 @@ import org.cleartk.classifier.chunking.B
 import org.cleartk.classifier.jar.DefaultSequenceDataWriterFactory;
 import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.component.ViewCreatorAnnotator;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
@@ -34,8 +35,13 @@ public class MetaTimeAnnotator extends T
 
   private BIOChunking<BaseToken, TimeMention> timeChunking;
 
-  static Class<?>[] components = new Class<?>[]{ BackwardsTimeAnnotator.class, TimeAnnotator.class, ConstituencyBasedTimeAnnotator.class, CRFTimeAnnotator.class };
+  @SuppressWarnings("unchecked")
+  static Class<? extends JCasAnnotator_ImplBase>[] components = new Class[]{ BackwardsTimeAnnotator.class, TimeAnnotator.class, ConstituencyBasedTimeAnnotator.class, CRFTimeAnnotator.class };
   
+  public static Class<? extends JCasAnnotator_ImplBase>[] getComponents() {
+    return components;
+  }
+
   public static AnalysisEngineDescription getDataWriterDescription(
       Class<? extends SequenceDataWriter<String>> dataWriterClass,
       File directory) throws ResourceInitializationException {
@@ -58,13 +64,6 @@ public class MetaTimeAnnotator extends T
         new File(directory, CRFTimeAnnotator.class.getSimpleName()), 
         CRFTimeAnnotator.class.getSimpleName()));
     
-//    builder.add(AnalysisEngineFactory.createPrimitiveDescription(MetaTimeAnnotator.class, 
-//        CleartkAnnotator.PARAM_IS_TRAINING,
-//        true,
-//        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
-//        dataWriterClass,
-//        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
-//        new File(directory, MetaTimeAnnotator.class.getSimpleName())));
     builder.add(AnalysisEngineFactory.createPrimitiveDescription(MetaTimeAnnotator.class,
           CleartkSequenceAnnotator.PARAM_IS_TRAINING,
           true,
@@ -106,7 +105,7 @@ public class MetaTimeAnnotator extends T
   public void initialize(UimaContext context) throws ResourceInitializationException {
     super.initialize(context);
     // define chunking
-    this.timeChunking = new BIOChunking<BaseToken, TimeMention>(BaseToken.class, TimeMention.class);
+    this.timeChunking = new BIOChunking<>(BaseToken.class, TimeMention.class);
   }
   
   @Override
@@ -114,7 +113,7 @@ public class MetaTimeAnnotator extends T
       throws AnalysisEngineProcessException {
     // classify tokens within each sentence
     for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
-      List<List<Feature>> sequenceFeatures = new ArrayList<List<Feature>>();
+      List<List<Feature>> sequenceFeatures = new ArrayList<>();
       List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
       // during training, the list of all outcomes for the tokens
       List<String> outcomes;
@@ -125,10 +124,10 @@ public class MetaTimeAnnotator extends T
       }
       // during prediction, the list of outcomes predicted so far
       else {
-        outcomes = new ArrayList<String>();
+        outcomes = new ArrayList<>();
       }
       
-      List<List<String>> componentOutcomes = new ArrayList<List<String>>();
+      List<List<String>> componentOutcomes = new ArrayList<>();
       for(Class<?> component : components){
         JCas componentView;
         try {
@@ -150,7 +149,7 @@ public class MetaTimeAnnotator extends T
       }
       
       for(int tokenIndex = 0; tokenIndex < tokens.size(); tokenIndex++){
-        List<Feature> features = new ArrayList<Feature>();
+        List<Feature> features = new ArrayList<>();
         
         for(int componentNum = 0; componentNum < componentOutcomes.size(); componentNum++){
           String outcome = componentOutcomes.get(componentNum).get(tokenIndex);
@@ -162,9 +161,9 @@ public class MetaTimeAnnotator extends T
           if(tokenIndex < tokens.size() -1){
             features.add(new Feature(String.format("Component%d_NextLabel", componentNum), componentOutcomes.get(componentNum).get(tokenIndex+1)));
           }
-          if(!outcome.equals("O")){
-            features.add(new Feature(String.format("Component%d_IsTime", componentNum)));
-          }
+//          if(!outcome.equals("O")){
+//            features.add(new Feature(String.format("Component%d_IsTime", componentNum)));
+//          }
         }
         
 //        if (this.isTraining()) {

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java?rev=1585848&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfMetaTimeExpressionExtractor.java Tue Apr  8 22:06:24 2014
@@ -0,0 +1,192 @@
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+
+import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
+import org.apache.ctakes.temporal.ae.CRFTimeAnnotator;
+import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.MetaTimeAnnotator;
+import org.apache.ctakes.temporal.ae.TimeAnnotator;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.crfsuite.CRFSuiteStringOutcomeDataWriter;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+
+import com.google.common.collect.Maps;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfMetaTimeExpressionExtractor extends EvaluationOfAnnotationSpans_ImplBase {
+  public static int nFolds = 2;
+  private List<Integer> allTrain = null;
+  
+  public EvaluationOfMetaTimeExpressionExtractor(File baseDirectory,
+      File rawTextDirectory, File xmlDirectory,
+      org.apache.ctakes.temporal.eval.Evaluation_ImplBase.XMLFormat xmlFormat,
+      File xmiDirectory, File treebankDirectory,
+      List<Integer> allTrain, Class<? extends Annotation> annotationClass) {
+    super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory,
+        treebankDirectory, annotationClass);
+    this.allTrain = allTrain;
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+    List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+    List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
+    List<Integer> allTrain = new ArrayList<>(trainItems);
+    List<Integer> allTest = null;
+    
+    if(options.getTest()){
+      allTrain.addAll(devItems);
+      allTest = new ArrayList<>(testItems);
+    }else{
+      allTest = new ArrayList<>(devItems);
+    }
+
+    EvaluationOfMetaTimeExpressionExtractor eval = new
+        EvaluationOfMetaTimeExpressionExtractor(
+            new File("target/eval/time-spans"), 
+            options.getRawTextDirectory(), 
+            options.getXMLDirectory(), 
+            options.getXMLFormat(),
+            options.getXMIDirectory(), 
+            options.getTreebankDirectory(),
+            allTrain,
+            TimeMention.class);
+    AnnotationStatistics<String> stats = eval.trainAndTest(allTrain, allTest);
+    System.out.println(stats.toString());
+  }
+
+  @Override
+  protected void train(CollectionReader collectionReader, File directory)
+      throws Exception {
+    
+    Class<? extends JCasAnnotator_ImplBase>[] annotatorClasses = MetaTimeAnnotator.getComponents();
+    
+    // add more annotator types?
+    Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments = Maps.newHashMap();
+    annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.3"});
+    annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
+    annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c", "0.3"});
+    annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.03"});
+    
+    JCasIterable[] casIters = new JCasIterable[nFolds];
+    for (int fold = 0; fold < nFolds; ++fold) {
+      List<Integer> xfoldTrain = selectTrainItems(allTrain, nFolds, fold);
+      List<Integer> xfoldTest = selectTestItems(allTrain, nFolds, fold);
+      AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+      File modelDirectory = getModelDirectory(new File("target/eval/time-spans/fold_"+fold));
+      for (Class<? extends JCasAnnotator_ImplBase> annotatorClass : annotatorClasses) {
+        EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
+            new File("target/eval/time-spans/" ),
+            this.rawTextDirectory,
+            this.xmlDirectory,
+            this.xmlFormat,
+            this.xmiDirectory,
+            this.treebankDirectory,
+            1,
+            0,
+            annotatorClass,
+            false,
+            annotatorTrainingArguments.get(annotatorClass));
+        evaluation.prepareXMIsFor(allTrain);
+        String name = String.format("%s.errors", annotatorClass.getSimpleName());
+        evaluation.setLogging(Level.FINE, new File("target/eval", name));
+
+        // train on 4 of the folds of the training data:
+        evaluation.train(evaluation.getCollectionReader(xfoldTrain), modelDirectory);
+        if(fold == 0){
+          // train the main model as well:
+          evaluation.train(evaluation.getCollectionReader(allTrain), directory);
+        }
+        
+      }
+      casIters[fold] = new JCasIterable(getCollectionReader(xfoldTest), aggregateBuilder.createAggregate());
+    }
+    // run meta data-writer for this fold:
+    AggregateBuilder writerBuilder = new AggregateBuilder();
+    writerBuilder.add(CopyFromGold.getDescription(TimeMention.class));
+    writerBuilder.add(this.getDataWriterDescription(directory));
+    AnalysisEngine writer = writerBuilder.createAggregate();
+    for(JCasIterable casIter : casIters){
+      for(JCas jcas : casIter){
+        SimplePipeline.runPipeline(jcas, writer);
+      }
+    }
+    writer.collectionProcessComplete();
+    JarClassifierBuilder.trainAndPackage(getModelDirectory(directory), new String[]{"-p", "c2=0.3"});
+  }
+  
+  private static List<Integer> selectTrainItems(List<Integer> items, int numFolds, int fold) {
+    List<Integer> trainItems = new ArrayList<>();
+    for (int i = 0; i < items.size(); ++i) {
+      if (i % numFolds != fold) {
+        trainItems.add(items.get(i));
+      }
+    }
+    return trainItems;
+  }
+  
+  private static List<Integer> selectTestItems(List<Integer> items, int numFolds, int fold) {
+    List<Integer> trainItems = new ArrayList<>();
+    for (int i = 0; i < items.size(); ++i) {
+      if (i % numFolds == fold) {
+        trainItems.add(items.get(i));
+      }
+    }
+    return trainItems;
+  }
+  
+
+  @Override
+  protected AnalysisEngineDescription getDataWriterDescription(File directory)
+      throws ResourceInitializationException {
+    return MetaTimeAnnotator.getDataWriterDescription(CRFSuiteStringOutcomeDataWriter.class, directory);          
+  }
+
+  @Override
+  protected void trainAndPackage(File directory) throws Exception {
+    JarClassifierBuilder.trainAndPackage(getModelDirectory(directory), "-p", "c2=0.3");
+  }
+
+  @Override
+  protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+      throws ResourceInitializationException {
+    return MetaTimeAnnotator.getAnnotatorDescription(directory);
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas,
+      Segment segment) {
+    return selectExact(jCas, TimeMention.class, segment);
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas,
+      Segment segment) {
+    return selectExact(jCas, TimeMention.class, segment);
+  }
+
+  private static File getModelDirectory(File directory) {
+    return new File(directory, "MetaTimeAnnotator");
+  }
+
+}