You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/18 01:35:34 UTC

svn commit: r1851594 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/SimpleMedDatesFinder.java utils/CalendarUtil.java

Author: seanfinan
Date: Fri Jan 18 01:35:33 2019
New Revision: 1851594

URL: http://svn.apache.org/viewvc?rev=1851594&view=rev
Log:
Fix for dash dates (2002-4-12), accept that chunks don't necessarily cover dates, attempt to account for short incorrect TimeMentions

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1851594&r1=1851593&r2=1851594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Fri Jan 18 01:35:33 2019
@@ -82,7 +82,8 @@ final public class SimpleMedDatesFinder
    private String _cuiListPath;
 
 
-   static private final Pattern PARTIAL_DATE = Pattern.compile( "[0-9]{0,2}/[0-9]{0,2}/[0-9]{4}" );
+   static private final Pattern SLASH_DATE = Pattern.compile( "[0-9]{0,2}/[0-9]{0,2}/[0-9]{2,4}" );
+   static private final Pattern DASH_DATE = Pattern.compile( "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}" );
 
    private Class<? extends Annotation> _lookupClass;
    private final Collection<String> _sectionList = new ArrayList<>();
@@ -191,7 +192,10 @@ final public class SimpleMedDatesFinder
             final Pair<Integer> span = createTextSpan( annotation, offset );
             spans.add( span );
             medMap.put( span, (EventMention)annotation );
-         } else if ( annotation instanceof TimeMention ) {
+         } else if ( annotation instanceof TimeMention
+                     && annotation.getCoveredText().length() >= 8
+                     && annotation.getCoveredText().length() <= 10
+                     && !spans.contains( createTextSpan( annotation, offset ) ) ) {
             final Calendar calendar = CalendarUtil.getCalendar( (TimeMention)annotation );
             if ( !NULL_CALENDAR.equals( calendar ) ) {
                final Pair<Integer> span = createTextSpan( annotation, offset );
@@ -199,6 +203,7 @@ final public class SimpleMedDatesFinder
                calendarMap.put( span, calendar );
             }
          } else if ( annotation instanceof DateAnnotation
+                     && annotation.getCoveredText().length() >= 8
                      && !spans.contains( createTextSpan( annotation, offset ) ) ) {
             final Calendar calendar = CalendarUtil.getCalendar( (DateAnnotation)annotation );
             if ( !NULL_CALENDAR.equals( calendar ) ) {
@@ -209,9 +214,16 @@ final public class SimpleMedDatesFinder
          } else if ( annotation instanceof Chunk
                      && annotation.getCoveredText().length() >= 6
                      && annotation.getCoveredText().length() <= 10
-                     && PARTIAL_DATE.matcher( annotation.getCoveredText() ).matches()
-                     && !spans.contains( createTextSpan( annotation, offset ) ) ) {
-            final Calendar calendar = CalendarUtil.getSlashCalendar( annotation.getCoveredText() );
+                     && (SLASH_DATE.matcher( annotation.getCoveredText() ).matches()
+                         || DASH_DATE.matcher( annotation.getCoveredText() ).matches()) ) {
+            // Chunks are not always reliable.  2005-03-06 is not a chunk ...
+//            LOGGER.warn( "Chunk " + createTextSpan( annotation, offset ) + " " + annotation.getCoveredText() );
+            final Calendar calendar;
+            if ( SLASH_DATE.matcher( annotation.getCoveredText() ).matches() ) {
+               calendar = CalendarUtil.getSlashCalendar( annotation.getCoveredText() );
+            } else {
+               calendar = CalendarUtil.getDashCalendar( annotation.getCoveredText() );
+            }
             if ( !NULL_CALENDAR.equals( calendar ) ) {
                final Pair<Integer> span = createTextSpan( annotation, offset );
                spans.add( span );
@@ -219,12 +231,16 @@ final public class SimpleMedDatesFinder
             }
          }
       }
-      processSpans( jCas, spans, medMap, calendarMap, startSpans, stopSpans );
+      startSpans.sort( Integer::compareTo );
+      stopSpans.sort( Integer::compareTo );
+      spans.sort( Comparator.comparingInt( Pair::getValue1 ) );
+      processSpans( jCas, offset, spans, medMap, calendarMap, startSpans, stopSpans );
    }
 
 
    static private void processSpans( final JCas jCas,
-                                     final List<Pair<Integer>> medDateSpans,
+                                     final int offset,
+                                     final List<Pair<Integer>> medOrDateSpans,
                                      final Map<Pair<Integer>, EventMention> medMap,
                                      final Map<Pair<Integer>, Calendar> calendarMap,
                                      final List<Integer> startSpans,
@@ -237,38 +253,12 @@ final public class SimpleMedDatesFinder
       TimeMention startMention = null;
       TimeMention stopMention = null;
 
-      for ( int z = medDateSpans.size() - 1; z >= 0; z-- ) {
-         final Pair<Integer> span = medDateSpans.get( z );
-         int startSpanBegin = startSpans.get( startIndex );
-         int stopSpanBegin = stopSpans.get( stopIndex );
-         if ( span.getValue1() > startSpanBegin && span.getValue1() < startSpanBegin + 15 ) {
-            final Calendar start = calendarMap.get( span );
-            if ( start != null ) {
-               startDate = CalendarUtil.createTypeDate( jCas, start );
-               startMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
-               startMention.setDate( startDate );
-               startIndex = Math.max( startIndex - 1, 0 );
-            }
-         } else if ( span.getValue1() > stopSpanBegin && span.getValue1() < stopSpanBegin + 15 ) {
-            final Calendar stop = calendarMap.get( span );
-            if ( stop != null ) {
-               stopDate = CalendarUtil.createTypeDate( jCas, stop );
-               stopMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
-               stopMention.setDate( stopDate );
-               stopIndex = Math.max( stopIndex - 1, 0 );
-               startDate = null;
-               startMention = null;
-            }
-         } else {
-            final EventMention med = medMap.get( span );
-            if ( med == null ) {
-               // possibly some interrupting date?  Reset the dates.
-               startDate = null;
-               startMention = null;
-               stopDate = null;
-               stopMention = null;
-               continue;
-            }
+      for ( int z = medOrDateSpans.size() - 1; z >= 0; z-- ) {
+         final Pair<Integer> medOrDateSpan = medOrDateSpans.get( z );
+         final int startSpanBegin = startIndex >= 0 ? startSpans.get( startIndex ) : Integer.MAX_VALUE;
+         final int stopSpanBegin = stopIndex >= 0 ? stopSpans.get( stopIndex ) : Integer.MAX_VALUE;
+         final EventMention med = medMap.get( medOrDateSpan );
+         if ( med != null ) {
             if ( med instanceof MedicationEventMention ) {
                if ( startDate != null ) {
                   ((MedicationEventMention)med).setStartDate( startDate );
@@ -284,6 +274,25 @@ final public class SimpleMedDatesFinder
                   ((MedicationMention)med).setEndDate( stopMention );
                }
             }
+         } else {
+            final Calendar calendar = calendarMap.get( medOrDateSpan );
+            if ( calendar == null ) {
+            } else if ( medOrDateSpan.getValue1() > startSpanBegin &&
+                        medOrDateSpan.getValue1() < startSpanBegin + 15 ) {
+               startDate = CalendarUtil.createTypeDate( jCas, calendar );
+               startMention = new TimeMention( jCas,
+                     offset + medOrDateSpan.getValue1(), offset + medOrDateSpan.getValue2() );
+               startMention.setDate( startDate );
+               startIndex--;
+            } else if ( medOrDateSpan.getValue1() > stopSpanBegin && medOrDateSpan.getValue1() < stopSpanBegin + 15 ) {
+               stopDate = CalendarUtil.createTypeDate( jCas, calendar );
+               stopMention = new TimeMention( jCas,
+                     offset + medOrDateSpan.getValue1(), offset + medOrDateSpan.getValue2() );
+               stopMention.setDate( stopDate );
+               stopIndex--;
+               startDate = null;
+               startMention = null;
+            }
          }
       }
    }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java?rev=1851594&r1=1851593&r2=1851594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java Fri Jan 18 01:35:33 2019
@@ -128,7 +128,7 @@ final public class CalendarUtil {
    }
 
    /**
-    * @param text something with 0-2 month digits, 0-2 day digits and 4 year digits all divided by slash
+    * @param text something with 0-2 month digits, 0-2 day digits and 2-4 year digits all divided by slash
     * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
     */
    static public Calendar getSlashCalendar( final String text ) {
@@ -144,7 +144,36 @@ final public class CalendarUtil {
          day = dayI;
       }
       final int year = parseInt( splits[ 2 ] );
-      return new GregorianCalendar( year, month - 1, day );
+      return createCalendar( month, day, year );
+   }
+
+   /**
+    * @param text something with 1-2 month digits, 1-2 day digits and 4 year digits all divided by dash
+    * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+    */
+   static public Calendar getDashCalendar( final String text ) {
+      final String[] splits = StringUtil.fastSplit( text, '-' );
+      final int month = parseInt( splits[ 0 ] );
+      int day = parseInt( splits[ 1 ] );
+      final int year = parseInt( splits[ 2 ] );
+      if ( splits[ 0 ].length() == 4 ) {
+         // year and month are reversed
+         return createCalendar( year, day, month );
+      }
+      return createCalendar( month, day, year );
+   }
+
+   static private Calendar createCalendar( final int month, final int day, final int year ) {
+      int y2kYear = year;
+      if ( year < 100 ) {
+         // silly kludge for 2 digit years
+         if ( year < 30 ) {
+            y2kYear += 2000;
+         } else {
+            y2kYear += 1900;
+         }
+      }
+      return new GregorianCalendar( y2kYear, month - 1, day );
    }
 
    /**