You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/09/18 17:04:43 UTC

svn commit: r1524439 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Author: dligach
Date: Wed Sep 18 15:04:42 2013
New Revision: 1524439

URL: http://svn.apache.org/r1524439
Log:
removed some duration patterns that are too general

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java?rev=1524439&r1=1524438&r2=1524439&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java Wed Sep 18 15:04:42 2013
@@ -41,29 +41,10 @@ import com.google.common.collect.Orderin
 /**
  * Extract durations of signs/symptoms.
  * 
- * TODO: check drinking.txt; fewer day durations are captured than exist in data.
- * 
  * @author dmitriy dligach
  */
 public class SignSymptomDurations {
 
-  // regular expression to match temporal durations
-  public final static String REGEX = "(sec|min|hour|hr|day|week|wk|mo|year|yr)";
-  
-  // mapping between temporal durations and their normal forms
-  public final static Map<String, String> MAPPING = ImmutableMap.<String, String>builder()
-      .put("sec", "second")
-      .put("min", "minute")
-      .put("hour", "hour")
-      .put("hr", "hour")
-      .put("day", "day")
-      .put("week", "week")
-      .put("wk", "week")
-      .put("mo", "month")
-      .put("year", "year")
-      .put("yr", "year")
-      .build(); 
-
   public static class Options extends Options_ImplBase {
 
     @Option(
@@ -81,18 +62,35 @@ public class SignSymptomDurations {
 		List<File> trainFiles = Arrays.asList(options.inputDirectory.listFiles());
     CollectionReader collectionReader = getCollectionReader(trainFiles);
 		
-    AnalysisEngine durationPrinter = AnalysisEngineFactory.createPrimitive(
-    		DurationPrinter.class);
+    AnalysisEngine temporalDurationExtractor = AnalysisEngineFactory.createPrimitive(
+    		TemporalDurationExtractor.class);
     		
-		SimplePipeline.runPipeline(collectionReader, durationPrinter);
+		SimplePipeline.runPipeline(collectionReader, temporalDurationExtractor);
 	}
   
-  public static class DurationPrinter extends JCasAnnotator_ImplBase {
-
-    // max distance between a time and an evenet
-    final int MAXDISTANCE = 2;
+  public static class TemporalDurationExtractor extends JCasAnnotator_ImplBase {
+    
+    // regular expression to match temporal durations in time mention annotations
+    private final static String REGEX = "(sec|min|hour|hrs|day|week|wk|month|year|yr)";
+    
+    // mapping between temporal durations and their normalized forms
+    private final static Map<String, String> MAPPING = ImmutableMap.<String, String>builder()
+        .put("sec", "second")
+        .put("min", "minute")
+        .put("hour", "hour")
+        .put("hrs", "hour")
+        .put("day", "day")
+        .put("week", "week")
+        .put("wk", "week")
+        .put("month", "month")
+        .put("year", "year")
+        .put("yr", "year")
+        .build(); 
+    
+    // max distance between an event and the time mention that defines the event's duration
+    private final static int MAXDISTANCE = 2;
 
-    // regex to match different time granularities
+    // regex to match different time granularities (e.g. 'day', 'month')
     Pattern pattern = Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
     
     @Override
@@ -104,15 +102,15 @@ public class SignSymptomDurations {
 
       // counts of different time granularities for this sign/symptom
       Multiset<String> durationDistribution = HashMultiset.create();
-      
-      for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
 
+      for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) {
         if(signSymptomMention.getCoveredText().equals(signSymptomText)) {
+      
           TimeMention nearestTimeMention = getNearestTimeMention(jCas, signSymptomMention);
-
           if(nearestTimeMention != null) {
             Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText());
 
+            // need the loop to handle things like 'several days/weeks'
             while(matcher.find()) {
               String matchedDuration = matcher.group(); // e.g. "wks"
               String normalizedDuration = MAPPING.get(matchedDuration);
@@ -123,7 +121,7 @@ public class SignSymptomDurations {
       }
 
       if(durationDistribution.size() > 0) { 
-        System.out.println(signSymptomText + "," + convertToString(durationDistribution));
+        // System.out.println(signSymptomText + "," + convertToString(durationDistribution));
         // System.out.println(signSymptomText + ": " + durationDistribution);
       }
     }
@@ -133,9 +131,6 @@ public class SignSymptomDurations {
      */
     private static TimeMention getNearestTimeMention(JCas jCas, SignSymptomMention signSymptomMention) {
       
-      // max distance between a time and an evenet
-      final int MAXDISTANCE = 2;
-      
       // distances to time expressions from this sign/symptom
       Map<TimeMention, Integer> distances = new HashMap<TimeMention, Integer>();