You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/06/27 18:17:44 UTC

svn commit: r1497420 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java

Author: tmill
Date: Thu Jun 27 16:17:44 2013
New Revision: 1497420

URL: http://svn.apache.org/r1497420
Log:
Added MetaTimeAnnotator class.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java   (with props)

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java?rev=1497420&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java Thu Jun 27 16:17:44 2013
@@ -0,0 +1,188 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.temporal.eval.THYMEData;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class MetaTimeAnnotator extends TemporalEntityAnnotator_ImplBase {
+
+  private BIOChunking<BaseToken, TimeMention> timeChunking;
+
+  static Class[] components = new Class[]{ BackwardsTimeAnnotator.class, TimeAnnotator.class, ConstituencyBasedTimeAnnotator.class, CRFTimeAnnotator.class };
+  
+  public static AnalysisEngineDescription getDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File directory) throws ResourceInitializationException {
+    AggregateBuilder builder = new AggregateBuilder();
+    
+    for(Class component : components){
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, component.getSimpleName()));
+    }
+    
+    builder.add(TimeAnnotator.createAnnotatorDescription(
+                      new File(directory, TimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, TimeAnnotator.class.getSimpleName());
+    builder.add(BackwardsTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, BackwardsTimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, BackwardsTimeAnnotator.class.getSimpleName());
+    builder.add(ConstituencyBasedTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, ConstituencyBasedTimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, ConstituencyBasedTimeAnnotator.class.getSimpleName());
+    builder.add(CRFTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, CRFTimeAnnotator.class.getSimpleName())), 
+                      TimeAnnotator.TIMEX_VIEW, CRFTimeAnnotator.class.getSimpleName());
+    builder.add(AnalysisEngineFactory.createPrimitiveDescription(MetaTimeAnnotator.class, 
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, MetaTimeAnnotator.class.getSimpleName())));   
+    return builder.createAggregateDescription();
+  }
+
+  public static AnalysisEngineDescription getAnnotatorDescription(File directory) throws ResourceInitializationException{
+    AggregateBuilder builder = new AggregateBuilder();
+    
+    for(Class component : components){
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, component.getSimpleName()));
+    }
+    builder.add(TimeAnnotator.createAnnotatorDescription(
+                      new File(directory, TimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, TimeAnnotator.class.getSimpleName());
+    builder.add(BackwardsTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, BackwardsTimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, BackwardsTimeAnnotator.class.getSimpleName());
+    builder.add(ConstituencyBasedTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, ConstituencyBasedTimeAnnotator.class.getSimpleName())),
+                      TimeAnnotator.TIMEX_VIEW, ConstituencyBasedTimeAnnotator.class.getSimpleName());
+    builder.add(CRFTimeAnnotator.createAnnotatorDescription(
+                      new File(directory, CRFTimeAnnotator.class.getSimpleName())), 
+                      TimeAnnotator.TIMEX_VIEW, CRFTimeAnnotator.class.getSimpleName());
+    builder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        MetaTimeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(directory, MetaTimeAnnotator.class.getSimpleName() + File.separator + "model.jar")));
+    return builder.createAggregateDescription();
+  }
+  
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    // define chunking
+    this.timeChunking = new BIOChunking<BaseToken, TimeMention>(BaseToken.class, TimeMention.class);
+  }
+  
+  @Override
+  public void process(JCas jCas, Segment segment)
+      throws AnalysisEngineProcessException {
+    // classify tokens within each sentence
+    for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
+      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+      // during training, the list of all outcomes for the tokens
+      List<String> outcomes;
+      
+      if (this.isTraining()) {
+        List<TimeMention> times = JCasUtil.selectCovered(jCas, TimeMention.class, sentence);
+        outcomes = this.timeChunking.createOutcomes(jCas, tokens, times);
+      }
+      // during prediction, the list of outcomes predicted so far
+      else {
+        outcomes = new ArrayList<String>();
+      }
+      
+      List<List<String>> componentOutcomes = new ArrayList<List<String>>();
+      for(Class component : components){
+        JCas componentView;
+        try {
+          componentView = jCas.getView(component.getSimpleName());
+          CasCopier casCopy = new CasCopier(jCas.getCas(), componentView.getCas());
+          org.apache.uima.cas.Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
+          for(BaseToken token: tokens){
+            BaseToken fs = (BaseToken) casCopy.copyFs(token);
+            fs.setFeatureValue(sofaFeature, componentView.getSofa());
+            fs.addToIndexes(componentView);
+          }
+          List<BaseToken> viewTokens = JCasUtil.selectCovered(componentView, BaseToken.class, sentence.getBegin(), sentence.getEnd());
+          List<TimeMention> times = JCasUtil.selectCovered(componentView, TimeMention.class, sentence);
+          componentOutcomes.add(this.timeChunking.createOutcomes(componentView, viewTokens, times));
+        } catch (CASException e) {
+          e.printStackTrace();
+          throw new AnalysisEngineProcessException(e);
+        }
+      }
+      
+      for(int tokenIndex = 0; tokenIndex < tokens.size(); tokenIndex++){
+        List<Feature> features = new ArrayList<Feature>();
+        
+        for(int componentNum = 0; componentNum < componentOutcomes.size(); componentNum++){
+          String outcome = componentOutcomes.get(componentNum).get(tokenIndex);
+          if(tokenIndex > 0){
+            features.add(new Feature("PreviousOutcome", outcomes.get(tokenIndex-1)));
+            features.add(new Feature(String.format("Component%d_PreviousLabel", componentNum), componentOutcomes.get(componentNum).get(tokenIndex-1)));
+          }
+          features.add(new Feature(String.format("Component%d_Label", componentNum), outcome));
+          if(tokenIndex < tokens.size() -1){
+            features.add(new Feature(String.format("Component%d_NextLabel", componentNum), componentOutcomes.get(componentNum).get(tokenIndex+1)));
+          }
+          if(!outcome.equals("O")){
+            features.add(new Feature(String.format("Component%d_IsTime", componentNum)));
+          }
+        }
+        
+        if (this.isTraining()) {
+          String outcome = outcomes.get(tokenIndex);
+          this.dataWriter.write(new Instance<String>(outcome, features));
+        }
+        // if predicting, add prediction to outcomes
+        else {
+          outcomes.add(this.classifier.classify(features));
+        }
+      }
+      
+      if (!this.isTraining()) {
+        this.timeChunking.createChunks(jCas, tokens, outcomes);
+      }
+
+    }
+  }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+      if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
+        this.process(jCas, segment);
+      }
+    }    
+  }
+
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/MetaTimeAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain