You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/08 14:49:52 UTC

svn commit: r1850756 - in /ctakes/trunk/ctakes-temporal: ./ src/main/java/org/apache/ctakes/temporal/ae/ src/main/java/org/apache/ctakes/temporal/utils/

Author: seanfinan
Date: Tue Jan  8 14:49:52 2019
New Revision: 1850756

URL: http://svn.apache.org/viewvc?rev=1850756&view=rev
Log:
DocTimeApproximator - attempt to set doc time using latest pre-now date in document
CalendarUtil - some methods used by DocTimeApproximator and SimpleMedDatesFinder
SimpleMedDatesFinder - refactor to use CalendarUtil

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
Modified:
    ctakes/trunk/ctakes-temporal/pom.xml
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java

Modified: ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/pom.xml?rev=1850756&r1=1850755&r2=1850756&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/pom.xml (original)
+++ ctakes/trunk/ctakes-temporal/pom.xml Tue Jan  8 14:49:52 2019
@@ -78,6 +78,11 @@
 			<groupId>org.scala-lang</groupId>
 			<artifactId>scala-library</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>com.rubiconproject.oss</groupId>
+			<artifactId>jchronic</artifactId>
+			<version>${jchronic.version}</version>
+		</dependency>
 	</dependencies>
 	<!-- The below is all necessary to unpack the UMLS resources since they 
 		can't be used from the classpath -->

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java?rev=1850756&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java Tue Jan  8 14:49:52 2019
@@ -0,0 +1,108 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.temporal.utils.CalendarUtil;
+import org.apache.ctakes.typesystem.type.structured.SourceData;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static java.util.Calendar.DAY_OF_MONTH;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.TIMEX;
+import static org.apache.ctakes.temporal.utils.CalendarUtil.NULL_CALENDAR;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2018
+ */
+@PipeBitInfo(
+      name = "DocTimeApproximator",
+      description = "Sets the document time based upon the latest normalized date earlier than now.",
+      role = PipeBitInfo.Role.ANNOTATOR,
+      usables = { TIMEX, IDENTIFIED_ANNOTATION }
+)
+final public class DocTimeApproximator extends JCasAnnotator_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "DocTimeApproximator" );
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+      final SourceData sourceData = SourceMetadataUtil.getOrCreateSourceData( jCas );
+      final String docTime = sourceData.getSourceOriginalDate();
+      if ( docTime != null && !docTime.isEmpty() ) {
+         LOGGER.info( "Document Time is " + docTime );
+         return;
+      }
+
+      final Collection<Calendar> calendars = new HashSet<>();
+
+      JCasUtil.select( jCas, TimeMention.class ).stream()
+              .map( CalendarUtil::getCalendar )
+              .forEach( calendars::add );
+
+      JCasUtil.select( jCas, DateAnnotation.class ).stream()
+              .map( CalendarUtil::getCalendar )
+              .forEach( calendars::add );
+
+      final Calendar lastCalendar = getLastCalendar( calendars );
+      if ( NULL_CALENDAR.equals( lastCalendar ) ) {
+         LOGGER.info( "Could not parse Document Time." );
+         return;
+      }
+
+      setDocTime( sourceData, lastCalendar );
+   }
+
+   /**
+    * @param calendars calendars in the document.
+    * @return the calendar with the latest date preceding "yesterday" or {@link CalendarUtil#NULL_CALENDAR}
+    */
+   static private Calendar getLastCalendar( final Collection<Calendar> calendars ) {
+      if ( calendars.isEmpty() ) {
+         return NULL_CALENDAR;
+      }
+      final Calendar nineteen = new Calendar.Builder().setDate( 1900, 0, 1 ).build();
+      final Calendar now = Calendar.getInstance();
+      now.add( DAY_OF_MONTH, -1 );
+      final List<Calendar> calendarList = calendars.stream()
+                                                   .filter( c -> !NULL_CALENDAR.equals( c ) )
+                                                   .filter( c -> c.compareTo( nineteen ) > 0 )
+                                                   .filter( c -> c.compareTo( now ) < 0 )
+                                                   .distinct()
+                                                   .sorted()
+                                                   .collect( Collectors.toList() );
+      if ( calendarList.isEmpty() ) {
+         return NULL_CALENDAR;
+      }
+      return calendarList.get( calendarList.size() - 1 );
+   }
+
+   /**
+    * Set the document time (source original date) to the calendar value.
+    *
+    * @param sourceData -
+    * @param calendar   -
+    */
+   static private void setDocTime( final SourceData sourceData, final Calendar calendar ) {
+      final String docTime = CalendarUtil.createDigitDateText( calendar );
+      sourceData.setSourceOriginalDate( docTime );
+      LOGGER.info( "Parsed Document Time is " + docTime );
+   }
+
+}

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1850756&r1=1850755&r2=1850756&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Tue Jan  8 14:49:52 2019
@@ -4,6 +4,7 @@ import org.apache.ctakes.core.pipeline.P
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.util.OntologyConceptUtil;
 import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.temporal.utils.CalendarUtil;
 import org.apache.ctakes.typesystem.type.refsem.Date;
 import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -27,7 +28,7 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.*;
 
-import static java.util.Calendar.DAY_OF_MONTH;
+import static org.apache.ctakes.temporal.utils.CalendarUtil.NULL_CALENDAR;
 
 /**
  * @author SPF , chip-nlp
@@ -82,9 +83,6 @@ final public class SimpleMedDatesFinder
    private String _cuiListPath;
 
 
-   static private final Calendar NULL_CALENDAR = new Calendar.Builder().setDate( 1, 1, 1 ).build();
-//   static private final Options PAST_OPTIONS = new Options( Pointer.PointerType.PAST );
-
    private Class<? extends Annotation> _lookupClass;
    private final Collection<String> _sectionList = new ArrayList<>();
    private final Collection<String> _cuiList = new ArrayList<>();
@@ -190,14 +188,14 @@ final public class SimpleMedDatesFinder
             spans.add( span );
             medMap.put( span, (MedicationEventMention)annotation );
          } else if ( annotation instanceof TimeMention ) {
-            final Calendar calendar = getCalendar( (TimeMention)annotation );
+            final Calendar calendar = CalendarUtil.getCalendar( (TimeMention)annotation );
             if ( !NULL_CALENDAR.equals( calendar ) ) {
                final Pair<Integer> span = createTextSpan( annotation, offset );
                spans.add( span );
                calendarMap.put( span, calendar );
             }
          } else if ( annotation instanceof DateAnnotation ) {
-            final Calendar calendar = getCalendar( (DateAnnotation)annotation );
+            final Calendar calendar = CalendarUtil.getCalendar( (DateAnnotation)annotation );
             if ( !NULL_CALENDAR.equals( calendar ) ) {
                final Pair<Integer> span = createTextSpan( annotation, offset );
                spans.add( span );
@@ -226,18 +224,12 @@ final public class SimpleMedDatesFinder
          if ( span.getValue1() > startIndex && span.getValue1() < startIndex + 5 ) {
             final Calendar start = calendarMap.get( span );
             if ( start != null ) {
-               startDate = new Date( jCas );
-               startDate.setDay( "" + start.get( DAY_OF_MONTH ) );
-               startDate.setMonth( "" + (start.get( Calendar.MONTH ) + 1) );
-               startDate.setYear( "" + start.get( Calendar.YEAR ) );
+               startDate = CalendarUtil.createTypeDate( jCas, start );
             }
          } else if ( span.getValue1() > stopIndex && span.getValue1() < stopIndex + 5 ) {
             final Calendar stop = calendarMap.get( span );
             if ( stop != null ) {
-               stopDate = new Date( jCas );
-               stopDate.setDay( "" + stop.get( DAY_OF_MONTH ) );
-               stopDate.setMonth( "" + (stop.get( Calendar.MONTH ) + 1) );
-               stopDate.setYear( "" + stop.get( Calendar.YEAR ) );
+               stopDate = CalendarUtil.createTypeDate( jCas, stop );
             }
          } else {
             final MedicationEventMention med = medMap.get( span );
@@ -288,66 +280,6 @@ final public class SimpleMedDatesFinder
    }
 
 
-   static private Calendar getCalendar( final TimeMention timeMention ) {
-      final org.apache.ctakes.typesystem.type.refsem.Date typeDate = timeMention.getDate();
-      if ( typeDate != null ) {
-         final int year = parseInt( typeDate.getYear() );
-         final int month = parseInt( typeDate.getMonth() );
-         final int day = parseInt( typeDate.getDay() );
-         if ( year == Integer.MIN_VALUE || month == Integer.MIN_VALUE || day == Integer.MIN_VALUE ) {
-            return NULL_CALENDAR;
-         }
-         LOGGER.debug( "TimeMention Date " + year + "" + month + "" + day );
-         return new Calendar.Builder().setDate( year, month - 1, day ).build();
-      }
-      return getCalendar( timeMention.getCoveredText() );
-   }
-
-   static private Calendar getCalendar( final DateAnnotation dateAnnotation ) {
-      return getCalendar( dateAnnotation.getCoveredText() );
-   }
-
-   static private Calendar getCalendar( final String text ) {
-      if ( isLousyDateText( text ) ) {
-         return NULL_CALENDAR;
-      }
-//      final Span span = Chronic.parse( text, PAST_OPTIONS );
-//      if ( span == null ) {
-//         return NULL_CALENDAR;
-//      }
-//      return span.getEndCalendar();
-
-      return NULL_CALENDAR;
-   }
-
-
-   static private boolean isLousyDateText( final String text ) {
-      if ( text.length() < 7 ) {
-         return true;
-      }
-      for ( char c : text.toCharArray() ) {
-         if ( Character.isDigit( c ) ) {
-            return false;
-         }
-      }
-      return true;
-   }
-
-   static private int parseInt( final String text ) {
-      if ( text == null || text.isEmpty() ) {
-         return Integer.MIN_VALUE;
-      }
-      for ( char c : text.toCharArray() ) {
-         if ( !Character.isDigit( c ) ) {
-            return Integer.MIN_VALUE;
-         }
-      }
-      try {
-         return Integer.parseInt( text );
-      } catch ( NumberFormatException nfE ) {
-         return Integer.MIN_VALUE;
-      }
-   }
 
 
    synchronized private void loadSections() throws ResourceInitializationException {

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java?rev=1850756&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java Tue Jan  8 14:49:52 2019
@@ -0,0 +1,149 @@
+package org.apache.ctakes.temporal.utils;
+
+
+import com.mdimension.jchronic.Chronic;
+import com.mdimension.jchronic.Options;
+import com.mdimension.jchronic.tags.Pointer;
+import com.mdimension.jchronic.utils.Span;
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+
+import static java.util.Calendar.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2019
+ */
+final public class CalendarUtil {
+
+   private CalendarUtil() {
+   }
+
+   static public final Calendar NULL_CALENDAR = new Calendar.Builder().setDate( 1, 1, 1 ).build();
+   static private final Options PAST_OPTIONS = new Options( Pointer.PointerType.PAST );
+
+
+   /**
+    * @param timeMention -
+    * @return Calendar created using preset date information in the TimeMention or its covered text, or {@link #NULL_CALENDAR}.
+    */
+   static public Calendar getCalendar( final TimeMention timeMention ) {
+      if ( timeMention == null ) {
+         return NULL_CALENDAR;
+      }
+      final Date typeDate = timeMention.getDate();
+      final Calendar typeCalendar = getCalendar( typeDate );
+      if ( !NULL_CALENDAR.equals( typeCalendar ) ) {
+         return typeCalendar;
+      }
+      return CalendarUtil.getCalendar( timeMention.getCoveredText() );
+   }
+
+   /**
+    * @param typeDate Type System Date, usually in a {@link TimeMention}.
+    * @return Calendar created using preset date information, or {@link #NULL_CALENDAR}.
+    */
+   static public Calendar getCalendar( final Date typeDate ) {
+      if ( typeDate == null ) {
+         return NULL_CALENDAR;
+      }
+      final int year = CalendarUtil.parseInt( typeDate.getYear() );
+      final int month = CalendarUtil.parseInt( typeDate.getMonth() );
+      final int day = CalendarUtil.parseInt( typeDate.getDay() );
+      if ( year == Integer.MIN_VALUE && month == Integer.MIN_VALUE && day == Integer.MIN_VALUE ) {
+         return NULL_CALENDAR;
+      }
+      final List<Integer> fields = new ArrayList<>( 6 );
+      if ( year != Integer.MIN_VALUE ) {
+         fields.add( Calendar.YEAR );
+         fields.add( year );
+      }
+      if ( month != Integer.MIN_VALUE ) {
+         fields.add( Calendar.MONTH );
+         fields.add( month - 1 );
+      }
+      if ( day != Integer.MIN_VALUE ) {
+         fields.add( Calendar.DAY_OF_MONTH );
+         fields.add( day );
+      }
+      final int[] array = new int[ fields.size() ];
+      for ( int i = 0; i < array.length; i++ ) {
+         array[ i ] = fields.get( i );
+      }
+      return new Calendar.Builder().setFields( array ).build();
+   }
+
+   /**
+    * @param dateAnnotation -
+    * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+    */
+   static public Calendar getCalendar( final DateAnnotation dateAnnotation ) {
+      return getCalendar( dateAnnotation.getCoveredText() );
+   }
+
+   /**
+    * @param jCas     ye olde ...
+    * @param calendar some calendar with actual date information
+    * @return Type System Date with filled day, month, year values
+    */
+   static public Date createTypeDate( final JCas jCas, final Calendar calendar ) {
+      final Date date = new Date( jCas );
+      date.setDay( "" + calendar.get( DAY_OF_MONTH ) );
+      date.setMonth( "" + (calendar.get( Calendar.MONTH ) + 1) );
+      date.setYear( "" + calendar.get( Calendar.YEAR ) );
+      return date;
+   }
+
+   /**
+    * @param calendar -
+    * @return ugly format date consisting only of digits with twelve o'clock : YYYYMMDD1200
+    */
+   static public String createDigitDateText( final Calendar calendar ) {
+      return String.format( "%04d%02d%02d1200",
+            calendar.get( YEAR ),
+            calendar.get( MONTH ) + 1,
+            calendar.get( DAY_OF_MONTH ) );
+   }
+
+   /**
+    * @param text -
+    * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+    */
+   static public Calendar getCalendar( final String text ) {
+      final Span span = Chronic.parse( text, PAST_OPTIONS );
+      if ( span == null ) {
+         return NULL_CALENDAR;
+      }
+      return span.getEndCalendar();
+   }
+
+   /**
+    * @param text -
+    * @return positive int value of text or {@link Integer#MIN_VALUE} if not possible.
+    */
+   static private int parseInt( final String text ) {
+      if ( text == null || text.isEmpty() ) {
+         return Integer.MIN_VALUE;
+      }
+      for ( char c : text.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            return Integer.MIN_VALUE;
+         }
+      }
+      try {
+         return Integer.parseInt( text );
+      } catch ( NumberFormatException nfE ) {
+         return Integer.MIN_VALUE;
+      }
+   }
+
+}
+