You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/02/25 17:13:22 UTC
svn commit: r1571722 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java
Author: dligach
Date: Tue Feb 25 16:13:22 2014
New Revision: 1571722
URL: http://svn.apache.org/r1571722
Log:
refactored so that it can be now used for sign/symptoms as well as disease/disorders (and other event mentions)
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java
- copied, changed from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Copied: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java (from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java?p2=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java&p1=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java&r1=1571720&r2=1571722&rev=1571722&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java Tue Feb 25 16:13:22 2014
@@ -14,6 +14,7 @@ import java.util.regex.Pattern;
import org.apache.ctakes.relationextractor.eval.XMIReader;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -35,12 +36,14 @@ import com.google.common.collect.Immutab
import com.google.common.collect.Multiset;
/**
- * Extract durations of signs/symptoms.
+ * Extract durations of event mentions (e.g. sign/symptom or disease/disorder).
*
* @author dmitriy dligach
*/
-public class SignSymptomDurations {
+public class EventDurationDistribution {
+ private static Class<? extends EventMention> targetClass = SignSymptomMention.class;
+
public static class Options {
@Option(
@@ -108,18 +111,18 @@ public class SignSymptomDurations {
Collection<DocumentID> ids = JCasUtil.select(jCas, DocumentID.class);
String fileName = ids.iterator().next().getDocumentID();
- String signSymptomText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
+ String mentionText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
// counts of different time granularities for this sign/symptom
Multiset<String> durationDistribution = HashMultiset.create();
- for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
- if(signSymptomMention.getCoveredText().equals(signSymptomText)) {
- if(isNegated(jCas, signSymptomMention) || isMedicationPattern(jCas, signSymptomMention)) {
+ for(EventMention mention : JCasUtil.select(jCas, targetClass)) {
+ if(mention.getCoveredText().equals(mentionText)) {
+ if(isNegated(jCas, mention) || isMedicationPattern(jCas, mention)) {
continue;
}
- TimeMention nearestTimeMention = getNearestTimeMention(jCas, signSymptomMention);
+ TimeMention nearestTimeMention = getNearestTimeMention(jCas, mention);
if(nearestTimeMention != null) {
Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText());
@@ -134,9 +137,9 @@ public class SignSymptomDurations {
}
if(durationDistribution.size() > 0) {
- System.out.println(formatDistribution(signSymptomText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]");
+ System.out.println(formatDistribution(mentionText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]");
}else{
- System.out.println(signSymptomText + ": No duration information found.");
+ System.out.println(mentionText + ": No duration information found.");
}
}
@@ -144,9 +147,9 @@ public class SignSymptomDurations {
* Return true if sign/symptom is negated.
* TODO: using rules for now; switch to using a negation module
*/
- private static boolean isNegated(JCas jCas, SignSymptomMention signSymptomMention) {
+ private static boolean isNegated(JCas jCas, EventMention mention) {
- for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 3)) {
+ for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 3)) {
if(token.getCoveredText().equals("no")) {
return true;
}
@@ -159,9 +162,9 @@ public class SignSymptomDurations {
* Return true of this is a medication pattern.
* E.g. five (5) ml po qid (4 times a day) as needed for heartburn for 2 weeks.
*/
- private static boolean isMedicationPattern(JCas jCas, SignSymptomMention signSymptomMention) {
+ private static boolean isMedicationPattern(JCas jCas, EventMention mention) {
- for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 1)) {
+ for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 1)) {
if(token.getCoveredText().equals("for")) {
return true;
}
@@ -174,9 +177,9 @@ public class SignSymptomDurations {
* Find nearest time mention that is within allowable distance.
* Return null if none found.
*/
- private static TimeMention getNearestTimeMention(JCas jCas, SignSymptomMention signSymptomMention) {
+ private static TimeMention getNearestTimeMention(JCas jCas, EventMention mention) {
- List<TimeMention> timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, signSymptomMention, 1);
+ List<TimeMention> timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, mention, 1);
if(timeMentions.size() < 1) {
return null;
}
@@ -184,7 +187,7 @@ public class SignSymptomDurations {
assert timeMentions.size() == 1;
TimeMention nearestTimeMention = timeMentions.get(0);
- int distance = JCasUtil.selectBetween(jCas, BaseToken.class, signSymptomMention, nearestTimeMention).size();
+ int distance = JCasUtil.selectBetween(jCas, BaseToken.class, mention, nearestTimeMention).size();
if(distance > MAXDISTANCE) {
return null;
}
@@ -222,13 +225,13 @@ public class SignSymptomDurations {
* Example: apnea, second:5, minute:1, hour:5, day:10, week:1, month:0, year:0
*/
private static String formatDistribution(
- String signSymptomText,
+ String mentionText,
Multiset<String> durationDistribution,
String separator,
boolean normalize) {
List<String> distribution = new LinkedList<String>();
- distribution.add(signSymptomText);
+ distribution.add(mentionText);
double total = 0;
if(normalize) {