You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/03/13 20:04:24 UTC

svn commit: r1577288 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae: PreserveCertainEventEventRelationsInGold.java feature/duration/DurationEventEventFeatureExtractor.java feature/duration/Utils.java

Author: dligach
Date: Thu Mar 13 19:04:24 2014
New Revision: 1577288

URL: http://svn.apache.org/r1577288
Log:
Code related to evaluating the effect of adding duration features for event-event relations

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java   (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java?rev=1577288&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java Thu Mar 13 19:04:24 2014
@@ -0,0 +1,82 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.feature.duration.Utils;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils.Callback;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+/**
+ * Preserve only those event-event relations whose both event arguments have duration data.
+ */
+public class PreserveCertainEventEventRelationsInGold extends JCasAnnotator_ImplBase {                                               
+  
+  public static final String GOLD_VIEW_NAME = "GoldView";
+
+  @Override                                                                                                                  
+  public void process(JCas jCas) throws AnalysisEngineProcessException {                                                     
+
+    Map<String, Map<String, Float>> textToDistribution = null;                                                                 
+    try {                                                                                                                      
+      textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8, new Callback());                                    
+    } catch(IOException e) {                                                                                                   
+      e.printStackTrace();                                                                                                     
+      return;                                                                                                                  
+    }  
+    
+    JCas goldView;                                                                                                           
+    try {                                                                                                                    
+      goldView = jCas.getView(GOLD_VIEW_NAME);                                                                               
+    } catch (CASException e) {                                                                                               
+      throw new AnalysisEngineProcessException(e);                                                                           
+    }                                                                                                                                                                                                                                         
+    
+    // remove relations where one or both arguments have no duration data
+    for(BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(goldView, BinaryTextRelation.class))) {            
+      RelationArgument arg1 = relation.getArg1();                                                                             
+      RelationArgument arg2 = relation.getArg2(); 
+
+      String event2Text;
+      String event1Text;
+      if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof EventMention) {
+        event1Text = arg1.getArgument().getCoveredText().toLowerCase();
+        event2Text = arg2.getArgument().getCoveredText().toLowerCase();
+      } else {
+        // this is not an event-event relation
+        continue;
+      }
+      
+      if(textToDistribution.containsKey(event1Text) && textToDistribution.containsKey(event2Text)) {
+        // we have duration distributions for both arguments, so keep it
+        continue;
+      }
+
+      arg1.removeFromIndexes();                                                                                            
+      arg2.removeFromIndexes();                                                                                            
+      relation.removeFromIndexes();
+    }
+    
+    // remove events (that didn't participate in relations) that have no data
+    for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class))) {
+      if(textToDistribution.containsKey(mention.getCoveredText().toLowerCase())) {
+        // these are the kind we keep
+        continue;
+      } 
+      
+      mention.removeFromIndexes();
+    }
+  }                                                                                                                          
+}                                                                                                                            
\ No newline at end of file

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1577288&r1=1577287&r2=1577288&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java Thu Mar 13 19:04:24 2014
@@ -40,13 +40,14 @@ public class DurationEventEventFeatureEx
       throws AnalysisEngineProcessException {
 
     List<Feature> features = new ArrayList<Feature>();
-    File durationLookup = new File(Utils.durationDistributionPath);
     String arg1text = arg1.getCoveredText().toLowerCase();
     String arg2text = arg2.getCoveredText().toLowerCase();
+    Float expectedDuration1;
+    Float expectedDuration2;
     
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
+      textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;
@@ -55,25 +56,19 @@ public class DurationEventEventFeatureEx
     Map<String, Float> arg1Distribution = textToDistribution.get(arg1text);
     if(arg1Distribution == null) {
       features.add(new Feature("arg1_no_duration_info"));
-    } else {
-//      float expectation1 = DurationExpectationFeatureExtractor.expectedDuration(arg1Distribution);
-//      features.add(new Feature("arg1_expected_duration", expectation1));
-      for(String timeUnit : arg1Distribution.keySet()) {
-        features.add(new Feature("duration_" + timeUnit, arg1Distribution.get(timeUnit)));  
-      }
-    }
+      return features;
+    } 
+    
+    expectedDuration1 = Utils.expectedDuration(arg1Distribution);
     
     Map<String, Float> arg2Distribution = textToDistribution.get(arg2text);
     if(arg2Distribution == null) {
       features.add(new Feature("arg2_no_duration_info"));
-    } else {
-//      float expectation2 = DurationExpectationFeatureExtractor.expectedDuration(arg2Distribution);
-//      features.add(new Feature("arg_expected_duration", expectation2));
-      for(String timeUnit : arg2Distribution.keySet()) {
-        features.add(new Feature("duration_" + timeUnit, arg2Distribution.get(timeUnit)));  
-      }
+      return features;
     }
     
+    expectedDuration2 = Utils.expectedDuration(arg2Distribution);
+    features.add(new Feature("expected_duration_difference", expectedDuration1 - expectedDuration2));
     return features;
   }
 }
\ No newline at end of file

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java?rev=1577288&r1=1577287&r2=1577288&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java Thu Mar 13 19:04:24 2014
@@ -26,7 +26,7 @@ import com.google.common.io.LineProcesso
  */
 public class Utils {
 
-  public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/SignSymptom/Distribution/distribution.txt";
+  public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/Combined/Distribution/mimic.txt";
   
   /**
    * Compute expected duration in seconds. Normalize by number of seconds in a year.