You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/03 17:50:15 UTC

svn commit: r1478850 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ConstituencyBasedTimeAnnotator.java eval/EvaluationOfTimeSpans.java

Author: tmill
Date: Fri May  3 15:50:14 2013
New Revision: 1478850

URL: http://svn.apache.org/r1478850
Log:
Checked in some code for potentially adding 2 annotators for time spans.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java?rev=1478850&r1=1478849&r2=1478850&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java Fri May  3 15:50:14 2013
@@ -2,7 +2,9 @@ package org.apache.ctakes.temporal.ae;
 
 import java.io.File;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
@@ -20,7 +22,7 @@ import org.cleartk.classifier.DataWriter
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import static org.cleartk.classifier.feature.extractor.CleartkExtractor.*;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
 import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
@@ -89,12 +91,20 @@ TemporalEntityAnnotator_ImplBase {
   public void process(JCas jCas, Segment segment)
       throws AnalysisEngineProcessException {
 
+    HashSet<TimeMention> mentions = new HashSet<TimeMention>(JCasUtil.selectCovered(TimeMention.class, segment));
+	  
     for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.class, segment)){
-      processNode(jCas, root.getChildren(0));
+      recursivelyProcessNode(jCas, root.getChildren(0), NON_MENTION, mentions);
     }
+//    if(mentions.size() > 0){
+//      System.out.println("Remaining mentions:");
+//      for(TimeMention mention : mentions){
+//        System.out.println(mention.getCoveredText());
+//      }
+//    }
   }
 
-  private void processNode(JCas jCas, TreebankNode node) throws CleartkProcessingException {
+  private void recursivelyProcessNode(JCas jCas, TreebankNode node, String parentCategory, Set<TimeMention> mentions) throws CleartkProcessingException {
     // accumulate features:
     ArrayList<Feature> features = new ArrayList<Feature>();
     String category = NON_MENTION;
@@ -103,6 +113,7 @@ TemporalEntityAnnotator_ImplBase {
     if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT"));
     features.add(new Feature("NODE_LABEL", node.getNodeType()));
     features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType()));
+    features.add(new Feature("PARENT_CAT", parentCategory));
     
     if(node.getLeaf()){
       features.add(new Feature("IS_LEAF"));
@@ -111,7 +122,9 @@ TemporalEntityAnnotator_ImplBase {
       for(int i = 0; i < node.getChildren().size(); i++){
         buffer.append(node.getChildren(i).getNodeType());
         buffer.append("_");
+        features.add(new Feature("CHILD_BAG", node.getChildren(i).getNodeType()));
       }
+//      features.add(new Feature("NUM_TOKENS", JCasUtil.selectCovered(BaseToken.class, node).size()));
       features.add(new Feature("PRODUCTION", buffer.toString()));
     }
     
@@ -119,12 +132,13 @@ TemporalEntityAnnotator_ImplBase {
     for(SimpleFeatureExtractor extractor : featureExtractors){
       features.addAll(extractor.extract(jCas, node));
     }
-    
+      
     if(this.isTraining()){
       List<TimeMention> goldMentions = JCasUtil.selectCovered(TimeMention.class, node);
       for(TimeMention mention : goldMentions){
         if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
           category = MENTION;
+          mentions.remove(mention);
         }
       }
       this.dataWriter.write(new Instance<String>(category, features));
@@ -142,8 +156,7 @@ TemporalEntityAnnotator_ImplBase {
 
     for(int i = 0; i < node.getChildren().size(); i++){
       TreebankNode child = node.getChildren(i);
-      processNode(jCas, child);
+      recursivelyProcessNode(jCas, child, category, mentions);
     }
   }
-
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1478850&r1=1478849&r2=1478850&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Fri May  3 15:50:14 2013
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.logging.Level;
 
 import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.TimeAnnotator;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.jcas.JCas;
@@ -32,6 +33,7 @@ import org.apache.uima.resource.Resource
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.factory.AnalysisEngineFactory;
 
 import com.lexicalscope.jewel.cli.CliFactory;
 
@@ -77,6 +79,10 @@ public class EvaluationOfTimeSpans exten
   @Override
   protected AnalysisEngineDescription getAnnotatorDescription(File directory)
       throws ResourceInitializationException {
+//    return AnalysisEngineFactory.createAggregateDescription(
+//        TimeAnnotator.createAnnotatorDescription(directory),
+//        AnalysisEngineFactory.createPrimitiveDescription(RemoveTreeAlignedMentions.class),
+//        ConstituencyBasedTimeAnnotator.createAnnotatorDescription(directory));
     return ConstituencyBasedTimeAnnotator.createAnnotatorDescription(directory);
   }