You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/08/27 18:49:45 UTC
svn commit: r1517879 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Author: dligach
Date: Tue Aug 27 16:49:45 2013
New Revision: 1517879
URL: http://svn.apache.org/r1517879
Log:
add calc for frequencies over various time granularities
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java?rev=1517879&r1=1517878&r2=1517879&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java Tue Aug 27 16:49:45 2013
@@ -8,6 +8,8 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.ctakes.relationextractor.eval.XMIReader;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
@@ -28,11 +30,15 @@ import org.uimafit.util.JCasUtil;
import com.google.common.base.Function;
import com.google.common.base.Functions;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
/**
* Extract durations of signs/symptoms.
*
+ * TODO: check drinking.txt; fewer day durations are captured than exist in data.
+ *
* @author dmitriy dligach
*/
public class SignSymptomDurations {
@@ -64,6 +70,9 @@ public class SignSymptomDurations {
// max distance between a time and an evenet
final int maxDistance = 2;
+
+ // regex to match different time granularities
+ Pattern pattern = Pattern.compile("(second|minute|hour|day|week|month|year)", Pattern.CASE_INSENSITIVE);
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -72,6 +81,9 @@ public class SignSymptomDurations {
String fileName = ids.iterator().next().getDocumentID();
String signSymptomText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
+ // counts of different time granularities for this sign/symptom
+ Multiset<String> durationDistribution = HashMultiset.create();
+
for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
if(signSymptomMention.getCoveredText().equals(signSymptomText)) {
@@ -89,10 +101,19 @@ public class SignSymptomDurations {
Collections.sort(sortedTimeMentions, Ordering.natural().onResultOf(getValue));
if(sortedTimeMentions.size() > 0 && distances.get(sortedTimeMentions.get(0)) <= maxDistance) {
- System.out.println(signSymptomMention.getCoveredText() + ": " + sortedTimeMentions.get(0).getCoveredText());
+
+ String timex = sortedTimeMentions.get(0).getCoveredText();
+ Matcher matcher = pattern.matcher(timex);
+ while(matcher.find()) {
+ durationDistribution.add(matcher.group());
+ }
}
}
}
+
+ if(durationDistribution.size() > 0) {
+ System.out.println(signSymptomText + ": " + durationDistribution);
+ }
}
}