You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/04/01 22:13:17 UTC

svn commit: r1583774 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java

Author: dligach
Date: Tue Apr  1 20:13:17 2014
New Revision: 1583774

URL: http://svn.apache.org/r1583774
Log:
Wrote a wrapper for Bethard normalizer that takes a timex string and returns a set of time units

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1583774&r1=1583773&r2=1583774&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java Tue Apr  1 20:13:17 2014
@@ -45,33 +45,89 @@ public class Utils {
   public static final String[] bins = {"second", "minute", "hour", "day", "week", "month", "year", "decade"};
   
   /**
+   * Extract time unit(s) from a temporal expression.
+   * Extracted time units should be a subset of the bins above.
+   * Return empty set if time units couldnot be extracted.
+   * E.g. July 5, 1984 -> day
+   */
+  public static HashSet<String> getTimeUnits(String timex) {
+   
+    Set<TemporalUnit> units = runTimexParser(timex.toLowerCase());
+    if(units == null) {
+      return null;
+    }
+    
+    HashSet<String> timeUnits = new HashSet<>();    
+    scala.collection.Iterator<TemporalUnit> iterator = units.iterator();
+    while(iterator.hasNext()) {
+      TemporalUnit unit = iterator.next();
+      String bin = putInBin(unit.getName());
+      if(bin != null) {
+        timeUnits.add(bin);    
+      }
+    }
+    
+    return timeUnits;
+  }
+  
+  /**
+   * Use Bethard normalizer to map a temporal expression to a time unit.
+   */
+  public static Set<TemporalUnit> runTimexParser(String timex) {
+
+    URL grammarURL = DurationEventTimeFeatureExtractor.class.getResource("/info/bethard/timenorm/en.grammar");
+    TemporalExpressionParser parser = new TemporalExpressionParser(grammarURL);
+    TimeSpan anchor = TimeSpan.of(2013, 12, 16);
+    Try<Temporal> result = parser.parse(timex, anchor);
+
+    Set<TemporalUnit> units = null;
+    if (result.isSuccess()) {
+      Temporal temporal = result.get();
+
+      if (temporal instanceof Period) {
+        units = ((Period) temporal).unitAmounts().keySet();
+      } else if (temporal instanceof PeriodSet) {
+        units = ((PeriodSet) temporal).period().unitAmounts().keySet();
+      } else if (temporal instanceof TimeSpan) {
+        units = ((TimeSpan) temporal).period().unitAmounts().keySet();
+      } else if (temporal instanceof TimeSpanSet) {
+        Set<TemporalField> fields = ((TimeSpanSet) temporal).fields().keySet();
+        units = null; // fill units by calling .getBaseUnit() on each field
+      }
+    }
+    
+    return units;
+  }
+  
+  /**
    * Take the time unit from Bethard noramlizer
-   * and output a coarser time unit, i.e. one of the eight bins
+   * and return a coarser time unit, i.e. one of the eight bins.
+   * Return null, if this cannot be done. 
    */
-  public static String makeCoarse(String timeUnit) {
+  public static String putInBin(String timeUnit) {
     
-    HashSet<String> allowableTimeUnits = new HashSet<String>(Arrays.asList(bins));
+    HashSet<String> allowableTimeUnits = new HashSet<>(Arrays.asList(bins));
     
-    // map output of Behard's normalizer to coarser time units
+    // e.g. Years -> year
+    String singularAndLowercased = timeUnit.substring(0, timeUnit.length() - 1).toLowerCase();
+
+    // is this one of the bins already?
+    if(allowableTimeUnits.contains(singularAndLowercased)) {
+      return singularAndLowercased;
+    } 
+
+    // units that Betard normalizer outputs mapped to one of the eight bins
     Map<String, String> mapping = ImmutableMap.<String, String>builder()
         .put("afternoon", "hour")
         .put("evening", "hour")
-        .put("fall", "month")
-        .put("winter", "month")
         .put("morning", "hour")
         .put("night", "hour")
-        .put("quarteryear", "month")
+        .put("fall", "month")
+        .put("winter", "month")
         .put("spring", "month")
         .put("summer", "month")
+        .put("quarteryear", "month")
         .build(); 
-
-    // e.g. Years -> year
-    String singularAndLowercased = timeUnit.substring(0, timeUnit.length() - 1).toLowerCase();
-
-    // is this one of the bins?
-    if(allowableTimeUnits.contains(singularAndLowercased)) {
-      return singularAndLowercased;
-    } 
     
     // it's not one of the bins; can we map to to a bin?
     if(mapping.get(singularAndLowercased) != null) {
@@ -106,35 +162,6 @@ public class Utils {
   
     return expectation / timeUnitInSeconds.get("decade");
   }
-
-  /*
-   * Use Bethard normalizer to map a temporal expression to a time unit.
-   */
-  public static Set<TemporalUnit> normalize(String timex) {
-
-    URL grammarURL = DurationEventTimeFeatureExtractor.class.getResource("/info/bethard/timenorm/en.grammar");
-    TemporalExpressionParser parser = new TemporalExpressionParser(grammarURL);
-    TimeSpan anchor = TimeSpan.of(2013, 12, 16);
-    Try<Temporal> result = parser.parse(timex, anchor);
-
-    Set<TemporalUnit> units = null;
-    if (result.isSuccess()) {
-      Temporal temporal = result.get();
-
-      if (temporal instanceof Period) {
-        units = ((Period) temporal).unitAmounts().keySet();
-      } else if (temporal instanceof PeriodSet) {
-        units = ((PeriodSet) temporal).period().unitAmounts().keySet();
-      } else if (temporal instanceof TimeSpan) {
-        units = ((TimeSpan) temporal).period().unitAmounts().keySet();
-      } else if (temporal instanceof TimeSpanSet) {
-        Set<TemporalField> fields = ((TimeSpanSet) temporal).fields().keySet();
-        units = null; // fill units by calling .getBaseUnit() on each field
-      }
-    }
-    
-    return units;
-  }
   
   /**
    * Take a time unit and return a probability distribution
@@ -236,7 +263,7 @@ public class Utils {
   
   public static void main(String[] args) throws IOException {
     
-    String lemma = lemmatize("left", "VBD");
-    System.out.println(lemma);
+    HashSet<String> timeUnits = getTimeUnits("three months");
+    System.out.println(timeUnits);
   }
 }