You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/04/01 23:20:49 UTC
svn commit: r1583791 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration:
EventDurationDistribution.java Utils.java
Author: dligach
Date: Tue Apr 1 21:20:48 2014
New Revision: 1583791
URL: http://svn.apache.org/r1583791
Log:
switch event duration extractor to using bethard timex normalizer
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java?rev=1583791&r1=1583790&r2=1583791&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java Tue Apr 1 21:20:48 2014
@@ -5,6 +5,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -72,10 +73,10 @@ public class EventDurationDistribution {
public static class TemporalDurationExtractor extends JCasAnnotator_ImplBase {
// regular expression to match temporal durations in time mention annotations
- private final static String REGEX = "(sec|min|hour|hrs|day|week|wk|month|year|yr|decade)";
+ private final static String regex = "(sec|min|hour|hrs|day|week|wk|month|year|yr|decade)";
- // mapping between temporal durations and their normalized forms
- private final static Map<String, String> MAPPING = ImmutableMap.<String, String>builder()
+ // mapping between time units and their normalized forms
+ private final static Map<String, String> abbreviationToTimeUnit = ImmutableMap.<String, String>builder()
.put("sec", "second")
.put("min", "minute")
.put("hour", "hour")
@@ -89,22 +90,11 @@ public class EventDurationDistribution {
.put("decade", "decade")
.build();
- // unique temporal bins; all time mentions will be classified into one of them
- private final static List<String> BINS = Arrays.asList(
- "second",
- "minute",
- "hour",
- "day",
- "week",
- "month",
- "year",
- "decade");
-
// max distance between an event and the time mention that defines the event's duration
private final static int MAXDISTANCE = 2;
- // regex to match different time granularities (e.g. 'day', 'month')
- Pattern pattern = Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
+ // regex to match different time units (e.g. 'day', 'month')
+ Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -113,7 +103,7 @@ public class EventDurationDistribution {
String fileName = ids.iterator().next().getDocumentID();
String mentionText = fileName.split("\\.")[0]; // e.g. "smoker.txt"
- // counts of different time granularities for this sign/symptom
+ // counts of different time units for this sign/symptom
Multiset<String> durationDistribution = HashMultiset.create();
for(EventMention mention : JCasUtil.select(jCas, targetClass)) {
@@ -123,26 +113,33 @@ public class EventDurationDistribution {
}
TimeMention nearestTimeMention = getNearestTimeMention(jCas, mention);
- if(nearestTimeMention != null) {
+ if(nearestTimeMention == null) {
+ continue;
+ }
+
+ // try to parse this timex with Bethard normalizer
+ HashSet<String> timeUnits = Utils.getTimeUnits(nearestTimeMention.getCoveredText());
+ if(timeUnits.size() > 0) {
+ for(String timeUnit : timeUnits) {
+ durationDistribution.add(timeUnit);
+ }
+ } else {
+ // could be an abbreviation e.g. "wks"
Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText());
-
- System.out.println(nearestTimeMention.getCoveredText());
-
- // need the loop to handle things like 'several days/weeks'
+ // need a loop to handle things like 'several days/weeks'
while(matcher.find()) {
- String matchedDuration = matcher.group(); // e.g. "wks"
- String normalizedDuration = MAPPING.get(matchedDuration);
- durationDistribution.add(normalizedDuration);
- }
+ String matchedTimeUnit = matcher.group(); // e.g. "wks"
+ String normalizedTimeUnit = abbreviationToTimeUnit.get(matchedTimeUnit);
+ System.out.println(nearestTimeMention.getCoveredText() + ": " + normalizedTimeUnit);
+ durationDistribution.add(normalizedTimeUnit);
+ }
}
}
}
if(durationDistribution.size() > 0) {
-// System.out.println(Utils.formatDistribution(mentionText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]");
- }else{
-// System.out.println(mentionText + ": No duration information found.");
- }
+// System.out.println(Utils.formatDistribution(mentionText, durationDistribution, ", ", false));
+ }
}
/**
@@ -176,7 +173,7 @@ public class EventDurationDistribution {
}
/**
- * Find nearest time mention that is within allowable distance.
+ * Find nearest time mention on the right that is within allowable distance.
* Return null if none found.
*/
private static TimeMention getNearestTimeMention(JCas jCas, EventMention mention) {
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1583791&r1=1583790&r2=1583791&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java Tue Apr 1 21:20:48 2014
@@ -52,12 +52,12 @@ public class Utils {
*/
public static HashSet<String> getTimeUnits(String timex) {
+ HashSet<String> timeUnits = new HashSet<>();
Set<TemporalUnit> units = runTimexParser(timex.toLowerCase());
if(units == null) {
- return null;
+ return timeUnits;
}
- HashSet<String> timeUnits = new HashSet<>();
scala.collection.Iterator<TemporalUnit> iterator = units.iterator();
while(iterator.hasNext()) {
TemporalUnit unit = iterator.next();