You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/18 01:35:34 UTC
svn commit: r1851594 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
ae/SimpleMedDatesFinder.java utils/CalendarUtil.java
Author: seanfinan
Date: Fri Jan 18 01:35:33 2019
New Revision: 1851594
URL: http://svn.apache.org/viewvc?rev=1851594&view=rev
Log:
Fix for dash dates (2002-4-12), accept that chunks don't necessarily cover dates, attempt to account for short incorrect TimeMentions
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1851594&r1=1851593&r2=1851594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Fri Jan 18 01:35:33 2019
@@ -82,7 +82,8 @@ final public class SimpleMedDatesFinder
private String _cuiListPath;
- static private final Pattern PARTIAL_DATE = Pattern.compile( "[0-9]{0,2}/[0-9]{0,2}/[0-9]{4}" );
+ static private final Pattern SLASH_DATE = Pattern.compile( "[0-9]{0,2}/[0-9]{0,2}/[0-9]{2,4}" );
+ static private final Pattern DASH_DATE = Pattern.compile( "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}" );
private Class<? extends Annotation> _lookupClass;
private final Collection<String> _sectionList = new ArrayList<>();
@@ -191,7 +192,10 @@ final public class SimpleMedDatesFinder
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
medMap.put( span, (EventMention)annotation );
- } else if ( annotation instanceof TimeMention ) {
+ } else if ( annotation instanceof TimeMention
+ && annotation.getCoveredText().length() >= 8
+ && annotation.getCoveredText().length() <= 10
+ && !spans.contains( createTextSpan( annotation, offset ) ) ) {
final Calendar calendar = CalendarUtil.getCalendar( (TimeMention)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
@@ -199,6 +203,7 @@ final public class SimpleMedDatesFinder
calendarMap.put( span, calendar );
}
} else if ( annotation instanceof DateAnnotation
+ && annotation.getCoveredText().length() >= 8
&& !spans.contains( createTextSpan( annotation, offset ) ) ) {
final Calendar calendar = CalendarUtil.getCalendar( (DateAnnotation)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
@@ -209,9 +214,16 @@ final public class SimpleMedDatesFinder
} else if ( annotation instanceof Chunk
&& annotation.getCoveredText().length() >= 6
&& annotation.getCoveredText().length() <= 10
- && PARTIAL_DATE.matcher( annotation.getCoveredText() ).matches()
- && !spans.contains( createTextSpan( annotation, offset ) ) ) {
- final Calendar calendar = CalendarUtil.getSlashCalendar( annotation.getCoveredText() );
+ && (SLASH_DATE.matcher( annotation.getCoveredText() ).matches()
+ || DASH_DATE.matcher( annotation.getCoveredText() ).matches()) ) {
+ // Chunks are not always reliable. 2005-03-06 is not a chunk ...
+// LOGGER.warn( "Chunk " + createTextSpan( annotation, offset ) + " " + annotation.getCoveredText() );
+ final Calendar calendar;
+ if ( SLASH_DATE.matcher( annotation.getCoveredText() ).matches() ) {
+ calendar = CalendarUtil.getSlashCalendar( annotation.getCoveredText() );
+ } else {
+ calendar = CalendarUtil.getDashCalendar( annotation.getCoveredText() );
+ }
if ( !NULL_CALENDAR.equals( calendar ) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
@@ -219,12 +231,16 @@ final public class SimpleMedDatesFinder
}
}
}
- processSpans( jCas, spans, medMap, calendarMap, startSpans, stopSpans );
+ startSpans.sort( Integer::compareTo );
+ stopSpans.sort( Integer::compareTo );
+ spans.sort( Comparator.comparingInt( Pair::getValue1 ) );
+ processSpans( jCas, offset, spans, medMap, calendarMap, startSpans, stopSpans );
}
static private void processSpans( final JCas jCas,
- final List<Pair<Integer>> medDateSpans,
+ final int offset,
+ final List<Pair<Integer>> medOrDateSpans,
final Map<Pair<Integer>, EventMention> medMap,
final Map<Pair<Integer>, Calendar> calendarMap,
final List<Integer> startSpans,
@@ -237,38 +253,12 @@ final public class SimpleMedDatesFinder
TimeMention startMention = null;
TimeMention stopMention = null;
- for ( int z = medDateSpans.size() - 1; z >= 0; z-- ) {
- final Pair<Integer> span = medDateSpans.get( z );
- int startSpanBegin = startSpans.get( startIndex );
- int stopSpanBegin = stopSpans.get( stopIndex );
- if ( span.getValue1() > startSpanBegin && span.getValue1() < startSpanBegin + 15 ) {
- final Calendar start = calendarMap.get( span );
- if ( start != null ) {
- startDate = CalendarUtil.createTypeDate( jCas, start );
- startMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
- startMention.setDate( startDate );
- startIndex = Math.max( startIndex - 1, 0 );
- }
- } else if ( span.getValue1() > stopSpanBegin && span.getValue1() < stopSpanBegin + 15 ) {
- final Calendar stop = calendarMap.get( span );
- if ( stop != null ) {
- stopDate = CalendarUtil.createTypeDate( jCas, stop );
- stopMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
- stopMention.setDate( stopDate );
- stopIndex = Math.max( stopIndex - 1, 0 );
- startDate = null;
- startMention = null;
- }
- } else {
- final EventMention med = medMap.get( span );
- if ( med == null ) {
- // possibly some interrupting date? Reset the dates.
- startDate = null;
- startMention = null;
- stopDate = null;
- stopMention = null;
- continue;
- }
+ for ( int z = medOrDateSpans.size() - 1; z >= 0; z-- ) {
+ final Pair<Integer> medOrDateSpan = medOrDateSpans.get( z );
+ final int startSpanBegin = startIndex >= 0 ? startSpans.get( startIndex ) : Integer.MAX_VALUE;
+ final int stopSpanBegin = stopIndex >= 0 ? stopSpans.get( stopIndex ) : Integer.MAX_VALUE;
+ final EventMention med = medMap.get( medOrDateSpan );
+ if ( med != null ) {
if ( med instanceof MedicationEventMention ) {
if ( startDate != null ) {
((MedicationEventMention)med).setStartDate( startDate );
@@ -284,6 +274,25 @@ final public class SimpleMedDatesFinder
((MedicationMention)med).setEndDate( stopMention );
}
}
+ } else {
+ final Calendar calendar = calendarMap.get( medOrDateSpan );
+ if ( calendar == null ) {
+ } else if ( medOrDateSpan.getValue1() > startSpanBegin &&
+ medOrDateSpan.getValue1() < startSpanBegin + 15 ) {
+ startDate = CalendarUtil.createTypeDate( jCas, calendar );
+ startMention = new TimeMention( jCas,
+ offset + medOrDateSpan.getValue1(), offset + medOrDateSpan.getValue2() );
+ startMention.setDate( startDate );
+ startIndex--;
+ } else if ( medOrDateSpan.getValue1() > stopSpanBegin && medOrDateSpan.getValue1() < stopSpanBegin + 15 ) {
+ stopDate = CalendarUtil.createTypeDate( jCas, calendar );
+ stopMention = new TimeMention( jCas,
+ offset + medOrDateSpan.getValue1(), offset + medOrDateSpan.getValue2() );
+ stopMention.setDate( stopDate );
+ stopIndex--;
+ startDate = null;
+ startMention = null;
+ }
}
}
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java?rev=1851594&r1=1851593&r2=1851594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java Fri Jan 18 01:35:33 2019
@@ -128,7 +128,7 @@ final public class CalendarUtil {
}
/**
- * @param text something with 0-2 month digits, 0-2 day digits and 4 year digits all divided by slash
+ * @param text something with 0-2 month digits, 0-2 day digits and 2-4 year digits all divided by slash
* @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
*/
static public Calendar getSlashCalendar( final String text ) {
@@ -144,7 +144,36 @@ final public class CalendarUtil {
day = dayI;
}
final int year = parseInt( splits[ 2 ] );
- return new GregorianCalendar( year, month - 1, day );
+ return createCalendar( month, day, year );
+ }
+
+ /**
+ * @param text something with 1-2 month digits, 1-2 day digits and 4 year digits all divided by dash
+ * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getDashCalendar( final String text ) {
+ final String[] splits = StringUtil.fastSplit( text, '-' );
+ final int month = parseInt( splits[ 0 ] );
+ int day = parseInt( splits[ 1 ] );
+ final int year = parseInt( splits[ 2 ] );
+ if ( splits[ 0 ].length() == 4 ) {
+ // year and month are reversed
+ return createCalendar( year, day, month );
+ }
+ return createCalendar( month, day, year );
+ }
+
+ static private Calendar createCalendar( final int month, final int day, final int year ) {
+ int y2kYear = year;
+ if ( year < 100 ) {
+ // silly kludge for 2 digit years
+ if ( year < 30 ) {
+ y2kYear += 2000;
+ } else {
+ y2kYear += 1900;
+ }
+ }
+ return new GregorianCalendar( y2kYear, month - 1, day );
}
/**