You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/09/26 18:12:03 UTC

svn commit: r1526592 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Author: dligach
Date: Thu Sep 26 16:12:03 2013
New Revision: 1526592

URL: http://svn.apache.org/r1526592
Log:
added a method to output the duration distribution in a machine/human readable format

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java?rev=1526592&r1=1526591&r2=1526592&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java Thu Sep 26 16:12:03 2013
@@ -4,8 +4,6 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -30,13 +28,10 @@ import org.uimafit.factory.CollectionRea
 import org.uimafit.pipeline.SimplePipeline;
 import org.uimafit.util.JCasUtil;
 
-import com.google.common.base.Function;
-import com.google.common.base.Functions;
 import com.google.common.base.Joiner;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Multiset;
-import com.google.common.collect.Ordering;
 
 /**
  * Extract durations of signs/symptoms.
@@ -87,6 +82,16 @@ public class SignSymptomDurations {
         .put("yr", "year")
         .build(); 
     
+    // unique temporal bins; all time mentions will be classified into one of them
+    private final static List<String> BINS = Arrays.asList(
+        "second",
+        "minute",
+        "hour",
+        "day",
+        "week",
+        "month",
+        "year");
+    
     // max distance between an event and the time mention that defines the event's duration
     private final static int MAXDISTANCE = 2;
 
@@ -124,8 +129,7 @@ public class SignSymptomDurations {
       }
 
       if(durationDistribution.size() > 0) { 
-        // System.out.println(signSymptomText + "," + convertToString(durationDistribution));
-        System.out.println(signSymptomText + ": " + durationDistribution);
+        System.out.println(formatDistribution(signSymptomText, durationDistribution, ", ", true));
       }
     }
     
@@ -181,6 +185,7 @@ public class SignSymptomDurations {
       return nearestTimeMention;
     }
     
+    @SuppressWarnings("unused")
     private static String getAnnotationContext(Annotation annotation, int maxContextWindowSize) {
       
       String text = annotation.getCAS().getDocumentText();
@@ -190,7 +195,8 @@ public class SignSymptomDurations {
       return text.substring(begin, end).replaceAll("[\r\n]", " ");
     }
     
-    private static String convertToString(Multiset<String> durationDistribution) {
+    @SuppressWarnings("unused")
+    private static String formatDistribution(Multiset<String> durationDistribution) {
       
       List<String> durationBins = Arrays.asList("second", "minute", "hour", "day", "week", "month", "year");
       List<Integer> durationValues = new LinkedList<Integer>();
@@ -202,6 +208,40 @@ public class SignSymptomDurations {
       Joiner joiner = Joiner.on(',');
       return joiner.join(durationValues);
     }
+    
+    /**
+     * Convert duration distribution multiset to a format that's easy to parse automatically.
+     * Format: <sign/symptom>,<time bin>:<count>, ...
+     * Example: apnea, second:5, minute:1, hour:5, day:10, week:1, month:0, year:0
+     */
+    private static String formatDistribution(
+        String signSymptomText, 
+        Multiset<String> durationDistribution, 
+        String separator,
+        boolean normalize) {
+      
+      List<String> distribution = new LinkedList<String>();
+      distribution.add(signSymptomText);
+
+      double total = 0;
+      if(normalize) {
+        for(String bin : BINS) {
+          total += durationDistribution.count(bin);
+        }
+      }
+      
+      for(String bin : BINS) {
+        if(normalize) {
+          distribution.add(String.format("%s:%.3f", bin, durationDistribution.count(bin) / total));  
+        } else {
+          distribution.add(String.format("%s:%d", bin, durationDistribution.count(bin)));
+        }
+        
+      }
+      
+      Joiner joiner = Joiner.on(separator);
+      return joiner.join(distribution);
+    }
   }
   
   private static CollectionReader getCollectionReader(List<File> items) throws Exception {