You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/08/27 18:49:45 UTC

svn commit: r1517879 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Author: dligach
Date: Tue Aug 27 16:49:45 2013
New Revision: 1517879

URL: http://svn.apache.org/r1517879
Log:
add calc for frequencies over various time granularities

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java?rev=1517879&r1=1517878&r2=1517879&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java Tue Aug 27 16:49:45 2013
@@ -8,6 +8,8 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.ctakes.relationextractor.eval.XMIReader;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
@@ -28,11 +30,15 @@ import org.uimafit.util.JCasUtil;
 
 import com.google.common.base.Function;
 import com.google.common.base.Functions;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
 import com.google.common.collect.Ordering;
 
 /**
  * Extract durations of signs/symptoms.
  * 
+ * TODO: check drinking.txt; fewer day durations are captured than exist in data.
+ * 
  * @author dmitriy dligach
  */
 public class SignSymptomDurations {
@@ -64,6 +70,9 @@ public class SignSymptomDurations {
 
     // max distance between a time and an evenet
     final int maxDistance = 2;
+
+    // regex to match different time granularities
+    Pattern pattern = Pattern.compile("(second|minute|hour|day|week|month|year)", Pattern.CASE_INSENSITIVE);
     
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -72,6 +81,9 @@ public class SignSymptomDurations {
       String fileName = ids.iterator().next().getDocumentID();
       String signSymptomText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
 
+      // counts of different time granularities for this sign/symptom
+      Multiset<String> durationDistribution = HashMultiset.create();
+      
       for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
 
         if(signSymptomMention.getCoveredText().equals(signSymptomText)) {
@@ -89,10 +101,19 @@ public class SignSymptomDurations {
           Collections.sort(sortedTimeMentions, Ordering.natural().onResultOf(getValue));
 
           if(sortedTimeMentions.size() > 0 && distances.get(sortedTimeMentions.get(0)) <= maxDistance) {
-            System.out.println(signSymptomMention.getCoveredText() + ": " + sortedTimeMentions.get(0).getCoveredText());
+
+            String timex = sortedTimeMentions.get(0).getCoveredText();
+            Matcher matcher = pattern.matcher(timex);
+            while(matcher.find()) {
+              durationDistribution.add(matcher.group());
+            }
           }
         }
       }
+
+      if(durationDistribution.size() > 0) { 
+        System.out.println(signSymptomText + ": " + durationDistribution);
+      }
     }
   }