You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/08 14:49:52 UTC
svn commit: r1850756 - in /ctakes/trunk/ctakes-temporal: ./
src/main/java/org/apache/ctakes/temporal/ae/
src/main/java/org/apache/ctakes/temporal/utils/
Author: seanfinan
Date: Tue Jan 8 14:49:52 2019
New Revision: 1850756
URL: http://svn.apache.org/viewvc?rev=1850756&view=rev
Log:
DocTimeApproximator - attempt to set doc time using latest pre-now date in document
CalendarUtil - some methods used by DocTimeApproximator and SimpleMedDatesFinder
SimpleMedDatesFinder - refactor to use CalendarUtil
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
Modified:
ctakes/trunk/ctakes-temporal/pom.xml
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
Modified: ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/pom.xml?rev=1850756&r1=1850755&r2=1850756&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/pom.xml (original)
+++ ctakes/trunk/ctakes-temporal/pom.xml Tue Jan 8 14:49:52 2019
@@ -78,6 +78,11 @@
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.rubiconproject.oss</groupId>
+ <artifactId>jchronic</artifactId>
+ <version>${jchronic.version}</version>
+ </dependency>
</dependencies>
<!-- The below is all necessary to unpack the UMLS resources since they
can't be used from the classpath -->
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java?rev=1850756&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeApproximator.java Tue Jan 8 14:49:52 2019
@@ -0,0 +1,108 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.temporal.utils.CalendarUtil;
+import org.apache.ctakes.typesystem.type.structured.SourceData;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static java.util.Calendar.DAY_OF_MONTH;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.TIMEX;
+import static org.apache.ctakes.temporal.utils.CalendarUtil.NULL_CALENDAR;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2018
+ */
+@PipeBitInfo(
+ name = "DocTimeApproximator",
+ description = "Sets the document time based upon the latest normalized date earlier than now.",
+ role = PipeBitInfo.Role.ANNOTATOR,
+ usables = { TIMEX, IDENTIFIED_ANNOTATION }
+)
+final public class DocTimeApproximator extends JCasAnnotator_ImplBase {
+
+ static private final Logger LOGGER = Logger.getLogger( "DocTimeApproximator" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ final SourceData sourceData = SourceMetadataUtil.getOrCreateSourceData( jCas );
+ final String docTime = sourceData.getSourceOriginalDate();
+ if ( docTime != null && !docTime.isEmpty() ) {
+ LOGGER.info( "Document Time is " + docTime );
+ return;
+ }
+
+ final Collection<Calendar> calendars = new HashSet<>();
+
+ JCasUtil.select( jCas, TimeMention.class ).stream()
+ .map( CalendarUtil::getCalendar )
+ .forEach( calendars::add );
+
+ JCasUtil.select( jCas, DateAnnotation.class ).stream()
+ .map( CalendarUtil::getCalendar )
+ .forEach( calendars::add );
+
+ final Calendar lastCalendar = getLastCalendar( calendars );
+ if ( NULL_CALENDAR.equals( lastCalendar ) ) {
+ LOGGER.info( "Could not parse Document Time." );
+ return;
+ }
+
+ setDocTime( sourceData, lastCalendar );
+ }
+
+ /**
+ * @param calendars calendars in the document.
+ * @return the calendar with the latest date preceding "yesterday" or {@link CalendarUtil#NULL_CALENDAR}
+ */
+ static private Calendar getLastCalendar( final Collection<Calendar> calendars ) {
+ if ( calendars.isEmpty() ) {
+ return NULL_CALENDAR;
+ }
+ final Calendar nineteen = new Calendar.Builder().setDate( 1900, 0, 1 ).build();
+ final Calendar now = Calendar.getInstance();
+ now.add( DAY_OF_MONTH, -1 );
+ final List<Calendar> calendarList = calendars.stream()
+ .filter( c -> !NULL_CALENDAR.equals( c ) )
+ .filter( c -> c.compareTo( nineteen ) > 0 )
+ .filter( c -> c.compareTo( now ) < 0 )
+ .distinct()
+ .sorted()
+ .collect( Collectors.toList() );
+ if ( calendarList.isEmpty() ) {
+ return NULL_CALENDAR;
+ }
+ return calendarList.get( calendarList.size() - 1 );
+ }
+
+ /**
+ * Set the document time (source original date) to the calendar value.
+ *
+ * @param sourceData -
+ * @param calendar -
+ */
+ static private void setDocTime( final SourceData sourceData, final Calendar calendar ) {
+ final String docTime = CalendarUtil.createDigitDateText( calendar );
+ sourceData.setSourceOriginalDate( docTime );
+ LOGGER.info( "Parsed Document Time is " + docTime );
+ }
+
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1850756&r1=1850755&r2=1850756&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Tue Jan 8 14:49:52 2019
@@ -4,6 +4,7 @@ import org.apache.ctakes.core.pipeline.P
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.OntologyConceptUtil;
import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.temporal.utils.CalendarUtil;
import org.apache.ctakes.typesystem.type.refsem.Date;
import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -27,7 +28,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
-import static java.util.Calendar.DAY_OF_MONTH;
+import static org.apache.ctakes.temporal.utils.CalendarUtil.NULL_CALENDAR;
/**
* @author SPF , chip-nlp
@@ -82,9 +83,6 @@ final public class SimpleMedDatesFinder
private String _cuiListPath;
- static private final Calendar NULL_CALENDAR = new Calendar.Builder().setDate( 1, 1, 1 ).build();
-// static private final Options PAST_OPTIONS = new Options( Pointer.PointerType.PAST );
-
private Class<? extends Annotation> _lookupClass;
private final Collection<String> _sectionList = new ArrayList<>();
private final Collection<String> _cuiList = new ArrayList<>();
@@ -190,14 +188,14 @@ final public class SimpleMedDatesFinder
spans.add( span );
medMap.put( span, (MedicationEventMention)annotation );
} else if ( annotation instanceof TimeMention ) {
- final Calendar calendar = getCalendar( (TimeMention)annotation );
+ final Calendar calendar = CalendarUtil.getCalendar( (TimeMention)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
calendarMap.put( span, calendar );
}
} else if ( annotation instanceof DateAnnotation ) {
- final Calendar calendar = getCalendar( (DateAnnotation)annotation );
+ final Calendar calendar = CalendarUtil.getCalendar( (DateAnnotation)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
@@ -226,18 +224,12 @@ final public class SimpleMedDatesFinder
if ( span.getValue1() > startIndex && span.getValue1() < startIndex + 5 ) {
final Calendar start = calendarMap.get( span );
if ( start != null ) {
- startDate = new Date( jCas );
- startDate.setDay( "" + start.get( DAY_OF_MONTH ) );
- startDate.setMonth( "" + (start.get( Calendar.MONTH ) + 1) );
- startDate.setYear( "" + start.get( Calendar.YEAR ) );
+ startDate = CalendarUtil.createTypeDate( jCas, start );
}
} else if ( span.getValue1() > stopIndex && span.getValue1() < stopIndex + 5 ) {
final Calendar stop = calendarMap.get( span );
if ( stop != null ) {
- stopDate = new Date( jCas );
- stopDate.setDay( "" + stop.get( DAY_OF_MONTH ) );
- stopDate.setMonth( "" + (stop.get( Calendar.MONTH ) + 1) );
- stopDate.setYear( "" + stop.get( Calendar.YEAR ) );
+ stopDate = CalendarUtil.createTypeDate( jCas, stop );
}
} else {
final MedicationEventMention med = medMap.get( span );
@@ -288,66 +280,6 @@ final public class SimpleMedDatesFinder
}
- static private Calendar getCalendar( final TimeMention timeMention ) {
- final org.apache.ctakes.typesystem.type.refsem.Date typeDate = timeMention.getDate();
- if ( typeDate != null ) {
- final int year = parseInt( typeDate.getYear() );
- final int month = parseInt( typeDate.getMonth() );
- final int day = parseInt( typeDate.getDay() );
- if ( year == Integer.MIN_VALUE || month == Integer.MIN_VALUE || day == Integer.MIN_VALUE ) {
- return NULL_CALENDAR;
- }
- LOGGER.debug( "TimeMention Date " + year + "" + month + "" + day );
- return new Calendar.Builder().setDate( year, month - 1, day ).build();
- }
- return getCalendar( timeMention.getCoveredText() );
- }
-
- static private Calendar getCalendar( final DateAnnotation dateAnnotation ) {
- return getCalendar( dateAnnotation.getCoveredText() );
- }
-
- static private Calendar getCalendar( final String text ) {
- if ( isLousyDateText( text ) ) {
- return NULL_CALENDAR;
- }
-// final Span span = Chronic.parse( text, PAST_OPTIONS );
-// if ( span == null ) {
-// return NULL_CALENDAR;
-// }
-// return span.getEndCalendar();
-
- return NULL_CALENDAR;
- }
-
-
- static private boolean isLousyDateText( final String text ) {
- if ( text.length() < 7 ) {
- return true;
- }
- for ( char c : text.toCharArray() ) {
- if ( Character.isDigit( c ) ) {
- return false;
- }
- }
- return true;
- }
-
- static private int parseInt( final String text ) {
- if ( text == null || text.isEmpty() ) {
- return Integer.MIN_VALUE;
- }
- for ( char c : text.toCharArray() ) {
- if ( !Character.isDigit( c ) ) {
- return Integer.MIN_VALUE;
- }
- }
- try {
- return Integer.parseInt( text );
- } catch ( NumberFormatException nfE ) {
- return Integer.MIN_VALUE;
- }
- }
synchronized private void loadSections() throws ResourceInitializationException {
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java?rev=1850756&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java Tue Jan 8 14:49:52 2019
@@ -0,0 +1,149 @@
+package org.apache.ctakes.temporal.utils;
+
+
+import com.mdimension.jchronic.Chronic;
+import com.mdimension.jchronic.Options;
+import com.mdimension.jchronic.tags.Pointer;
+import com.mdimension.jchronic.utils.Span;
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+
+import static java.util.Calendar.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2019
+ */
+final public class CalendarUtil {
+
+ private CalendarUtil() {
+ }
+
+ static public final Calendar NULL_CALENDAR = new Calendar.Builder().setDate( 1, 1, 1 ).build();
+ static private final Options PAST_OPTIONS = new Options( Pointer.PointerType.PAST );
+
+
+ /**
+ * @param timeMention -
+ * @return Calendar created using preset date information in the TimeMention or its covered text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getCalendar( final TimeMention timeMention ) {
+ if ( timeMention == null ) {
+ return NULL_CALENDAR;
+ }
+ final Date typeDate = timeMention.getDate();
+ final Calendar typeCalendar = getCalendar( typeDate );
+ if ( !NULL_CALENDAR.equals( typeCalendar ) ) {
+ return typeCalendar;
+ }
+ return CalendarUtil.getCalendar( timeMention.getCoveredText() );
+ }
+
+ /**
+ * @param typeDate Type System Date, usually in a {@link TimeMention}.
+ * @return Calendar created using preset date information, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getCalendar( final Date typeDate ) {
+ if ( typeDate == null ) {
+ return NULL_CALENDAR;
+ }
+ final int year = CalendarUtil.parseInt( typeDate.getYear() );
+ final int month = CalendarUtil.parseInt( typeDate.getMonth() );
+ final int day = CalendarUtil.parseInt( typeDate.getDay() );
+ if ( year == Integer.MIN_VALUE && month == Integer.MIN_VALUE && day == Integer.MIN_VALUE ) {
+ return NULL_CALENDAR;
+ }
+ final List<Integer> fields = new ArrayList<>( 6 );
+ if ( year != Integer.MIN_VALUE ) {
+ fields.add( Calendar.YEAR );
+ fields.add( year );
+ }
+ if ( month != Integer.MIN_VALUE ) {
+ fields.add( Calendar.MONTH );
+ fields.add( month - 1 );
+ }
+ if ( day != Integer.MIN_VALUE ) {
+ fields.add( Calendar.DAY_OF_MONTH );
+ fields.add( day );
+ }
+ final int[] array = new int[ fields.size() ];
+ for ( int i = 0; i < array.length; i++ ) {
+ array[ i ] = fields.get( i );
+ }
+ return new Calendar.Builder().setFields( array ).build();
+ }
+
+ /**
+ * @param dateAnnotation -
+ * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getCalendar( final DateAnnotation dateAnnotation ) {
+ return getCalendar( dateAnnotation.getCoveredText() );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @param calendar some calendar with actual date information
+ * @return Type System Date with filled day, month, year values
+ */
+ static public Date createTypeDate( final JCas jCas, final Calendar calendar ) {
+ final Date date = new Date( jCas );
+ date.setDay( "" + calendar.get( DAY_OF_MONTH ) );
+ date.setMonth( "" + (calendar.get( Calendar.MONTH ) + 1) );
+ date.setYear( "" + calendar.get( Calendar.YEAR ) );
+ return date;
+ }
+
+ /**
+ * @param calendar -
+ * @return ugly format date consisting only of digits with twelve o'clock : YYYYMMDD1200
+ */
+ static public String createDigitDateText( final Calendar calendar ) {
+ return String.format( "%04d%02d%02d1200",
+ calendar.get( YEAR ),
+ calendar.get( MONTH ) + 1,
+ calendar.get( DAY_OF_MONTH ) );
+ }
+
+ /**
+ * @param text -
+ * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getCalendar( final String text ) {
+ final Span span = Chronic.parse( text, PAST_OPTIONS );
+ if ( span == null ) {
+ return NULL_CALENDAR;
+ }
+ return span.getEndCalendar();
+ }
+
+ /**
+ * @param text -
+ * @return positive int value of text or {@link Integer#MIN_VALUE} if not possible.
+ */
+ static private int parseInt( final String text ) {
+ if ( text == null || text.isEmpty() ) {
+ return Integer.MIN_VALUE;
+ }
+ for ( char c : text.toCharArray() ) {
+ if ( !Character.isDigit( c ) ) {
+ return Integer.MIN_VALUE;
+ }
+ }
+ try {
+ return Integer.parseInt( text );
+ } catch ( NumberFormatException nfE ) {
+ return Integer.MIN_VALUE;
+ }
+ }
+
+}
+