You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/02/25 17:13:22 UTC

svn commit: r1571722 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java

Author: dligach
Date: Tue Feb 25 16:13:22 2014
New Revision: 1571722

URL: http://svn.apache.org/r1571722
Log:
refactored so that it can be now used for sign/symptoms as well as disease/disorders (and other event mentions)

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java
      - copied, changed from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Copied: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java (from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java?p2=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java&p1=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java&r1=1571720&r2=1571722&rev=1571722&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java Tue Feb 25 16:13:22 2014
@@ -14,6 +14,7 @@ import java.util.regex.Pattern;
 import org.apache.ctakes.relationextractor.eval.XMIReader;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -35,12 +36,14 @@ import com.google.common.collect.Immutab
 import com.google.common.collect.Multiset;
 
 /**
- * Extract durations of signs/symptoms.
+ * Extract durations of event mentions (e.g. sign/symptom or disease/disorder).
  * 
  * @author dmitriy dligach
  */
-public class SignSymptomDurations {
+public class EventDurationDistribution {
 
+  private static Class<? extends EventMention> targetClass = SignSymptomMention.class;
+  
   public static class Options  {
 
     @Option(
@@ -108,18 +111,18 @@ public class SignSymptomDurations {
 
       Collection<DocumentID> ids = JCasUtil.select(jCas, DocumentID.class);
       String fileName = ids.iterator().next().getDocumentID();
-      String signSymptomText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
+      String mentionText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
 
       // counts of different time granularities for this sign/symptom
       Multiset<String> durationDistribution = HashMultiset.create();
 
-      for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
-        if(signSymptomMention.getCoveredText().equals(signSymptomText)) {
-          if(isNegated(jCas, signSymptomMention) || isMedicationPattern(jCas, signSymptomMention)) {
+      for(EventMention mention : JCasUtil.select(jCas, targetClass)) {
+        if(mention.getCoveredText().equals(mentionText)) {
+          if(isNegated(jCas, mention) || isMedicationPattern(jCas, mention)) {
             continue;
           }
           
-          TimeMention nearestTimeMention = getNearestTimeMention(jCas, signSymptomMention);
+          TimeMention nearestTimeMention = getNearestTimeMention(jCas, mention);
           if(nearestTimeMention != null) {
             Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText());
 
@@ -134,9 +137,9 @@ public class SignSymptomDurations {
       }
 
       if(durationDistribution.size() > 0) { 
-        System.out.println(formatDistribution(signSymptomText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]");
+        System.out.println(formatDistribution(mentionText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]");
       }else{
-        System.out.println(signSymptomText + ": No duration information found.");
+        System.out.println(mentionText + ": No duration information found.");
       }
     }
     
@@ -144,9 +147,9 @@ public class SignSymptomDurations {
      * Return true if sign/symptom is negated.
      * TODO: using rules for now; switch to using a negation module
      */
-    private static boolean isNegated(JCas jCas, SignSymptomMention signSymptomMention) {
+    private static boolean isNegated(JCas jCas, EventMention mention) {
       
-      for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 3)) {
+      for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 3)) {
         if(token.getCoveredText().equals("no")) {
           return true;
         }
@@ -159,9 +162,9 @@ public class SignSymptomDurations {
      * Return true of this is a medication pattern. 
      * E.g. five (5) ml po qid  (4 times a day) as needed for heartburn for 2 weeks.
      */
-    private static boolean isMedicationPattern(JCas jCas, SignSymptomMention signSymptomMention) {
+    private static boolean isMedicationPattern(JCas jCas, EventMention mention) {
       
-      for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 1)) {
+      for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 1)) {
         if(token.getCoveredText().equals("for")) {
           return true;
         }
@@ -174,9 +177,9 @@ public class SignSymptomDurations {
      * Find nearest time mention that is within allowable distance. 
      * Return null if none found.
      */
-    private static TimeMention getNearestTimeMention(JCas jCas, SignSymptomMention signSymptomMention) {
+    private static TimeMention getNearestTimeMention(JCas jCas, EventMention mention) {
 
-      List<TimeMention> timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, signSymptomMention, 1);
+      List<TimeMention> timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, mention, 1);
       if(timeMentions.size() < 1) {
         return null;
       }
@@ -184,7 +187,7 @@ public class SignSymptomDurations {
       assert timeMentions.size() == 1;
       
       TimeMention nearestTimeMention = timeMentions.get(0);
-      int distance = JCasUtil.selectBetween(jCas, BaseToken.class, signSymptomMention, nearestTimeMention).size();
+      int distance = JCasUtil.selectBetween(jCas, BaseToken.class, mention, nearestTimeMention).size();
       if(distance > MAXDISTANCE) {
         return null;
       }
@@ -222,13 +225,13 @@ public class SignSymptomDurations {
      * Example: apnea, second:5, minute:1, hour:5, day:10, week:1, month:0, year:0
      */
     private static String formatDistribution(
-        String signSymptomText, 
+        String mentionText, 
         Multiset<String> durationDistribution, 
         String separator,
         boolean normalize) {
       
       List<String> distribution = new LinkedList<String>();
-      distribution.add(signSymptomText);
+      distribution.add(mentionText);
 
       double total = 0;
       if(normalize) {