You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/11 01:17:55 UTC
svn commit: r1850997 - in /ctakes/trunk:
ctakes-core/src/main/java/org/apache/ctakes/core/util/
ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/
ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/
ctakes-temporal/s...
Author: seanfinan
Date: Fri Jan 11 01:17:55 2019
New Revision: 1850997
URL: http://svn.apache.org/viewvc?rev=1850997&view=rev
Log:
SimpleMedDatesFinder : ability to parse incomplete // dates
CalendarUtil : ability to parse incomplete // dates
SimpleMedDatesPrinter : prints start and stop information to log
DefaultMedSections.txt : list of section names that may have medication history
Added:
ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/DefaultMedSections.txt
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/SimpleMedDatesPrinter.java
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/StringUtil.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/StringUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/StringUtil.java?rev=1850997&r1=1850996&r2=1850997&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/StringUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/StringUtil.java Fri Jan 11 01:17:55 2019
@@ -32,7 +32,7 @@ final public class StringUtil {
final String[] tokens = new String[ line.length() / 2 + 1 ];
int index = 0;
int lastSplit = -1;
- while ( nextSplit > 0 ) {
+ while ( nextSplit >= 0 ) {
tokens[ index ] = line.substring( lastSplit + 1, nextSplit );
lastSplit = nextSplit;
nextSplit = line.indexOf( c, lastSplit + 1 );
Added: ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/DefaultMedSections.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/DefaultMedSections.txt?rev=1850997&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/DefaultMedSections.txt (added)
+++ ctakes/trunk/ctakes-temporal-res/src/main/resources/org/apache/ctakes/temporal/ae/section/DefaultMedSections.txt Fri Jan 11 01:17:55 2019
@@ -0,0 +1,16 @@
+Medications
+Medications Outside Hospital
+Discharge Instructions
+Emergency Department Course
+Medication History
+Input Fluids
+Medications By Type
+Substance Abuse Treatment
+Medications at Transfer
+Instructions
+Current Antibiotics
+Plan
+Labs
+Diet
+Vaccinations
+Past Medications
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1850997&r1=1850996&r2=1850997&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Fri Jan 11 01:17:55 2019
@@ -6,10 +6,8 @@ import org.apache.ctakes.core.util.Ontol
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.temporal.utils.CalendarUtil;
import org.apache.ctakes.typesystem.type.refsem.Date;
-import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.textsem.*;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
@@ -27,6 +25,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
+import java.util.regex.Pattern;
import static org.apache.ctakes.temporal.utils.CalendarUtil.NULL_CALENDAR;
@@ -37,7 +36,7 @@ import static org.apache.ctakes.temporal
*/
@PipeBitInfo(
name = "SimpleMedDatesFinder",
- description = "Finds start and stop dates for events.",
+ description = "Finds start and stop dates for medication events.",
role = PipeBitInfo.Role.ANNOTATOR,
dependencies = PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION,
products = PipeBitInfo.TypeProduct.TEMPORAL_RELATION
@@ -83,6 +82,8 @@ final public class SimpleMedDatesFinder
private String _cuiListPath;
+ static private final Pattern PARTIAL_DATE = Pattern.compile( "[0-9]{0,2}/[0-9]{0,2}/[0-9]{4}" );
+
private Class<? extends Annotation> _lookupClass;
private final Collection<String> _sectionList = new ArrayList<>();
private final Collection<String> _cuiList = new ArrayList<>();
@@ -119,16 +120,16 @@ final public class SimpleMedDatesFinder
*/
@Override
public void process( final JCas jCas ) throws AnalysisEngineProcessException {
- LOGGER.info( "Finding Simple Event Dates ..." );
+ LOGGER.info( "Finding Medication Dates ..." );
- final Map<Annotation, Collection<IdentifiedAnnotation>> windowAnnotationMap = new HashMap<>();
+ final Map<Annotation, Collection<Annotation>> windowAnnotationMap = new HashMap<>();
if ( _sectionList.isEmpty() ) {
- windowAnnotationMap.putAll( JCasUtil.indexCovered( jCas, _lookupClass, IdentifiedAnnotation.class ) );
+ windowAnnotationMap.putAll( JCasUtil.indexCovered( jCas, _lookupClass, Annotation.class ) );
} else {
if ( _lookupClass.equals( Segment.class ) ) {
- final Map<Segment, Collection<IdentifiedAnnotation>> sectionAnnotationMap
- = JCasUtil.indexCovered( jCas, Segment.class, IdentifiedAnnotation.class );
- for ( Map.Entry<Segment, Collection<IdentifiedAnnotation>> sectionAnnotations : sectionAnnotationMap.entrySet() ) {
+ final Map<Segment, Collection<Annotation>> sectionAnnotationMap
+ = JCasUtil.indexCovered( jCas, Segment.class, Annotation.class );
+ for ( Map.Entry<Segment, Collection<Annotation>> sectionAnnotations : sectionAnnotationMap.entrySet() ) {
final Segment section = sectionAnnotations.getKey();
if ( _sectionList.contains( section.getPreferredText() )
|| _sectionList.contains( section.getId() ) ) {
@@ -142,8 +143,8 @@ final public class SimpleMedDatesFinder
final Segment section = sectionWindows.getKey();
if ( _sectionList.contains( section.getPreferredText() )
|| _sectionList.contains( section.getId() ) ) {
- final Collection<IdentifiedAnnotation> annotations
- = JCasUtil.selectCovered( jCas, IdentifiedAnnotation.class, section );
+ final Collection<Annotation> annotations
+ = JCasUtil.selectCovered( jCas, Annotation.class, section );
windowAnnotationMap.putAll( splitCovered( annotations, sectionWindows.getValue() ) );
}
}
@@ -151,7 +152,7 @@ final public class SimpleMedDatesFinder
}
}
- for ( Map.Entry<Annotation, Collection<IdentifiedAnnotation>> windowAnnotations : windowAnnotationMap.entrySet() ) {
+ for ( Map.Entry<Annotation, Collection<Annotation>> windowAnnotations : windowAnnotationMap.entrySet() ) {
processWindow( jCas, windowAnnotations.getKey(), windowAnnotations.getValue() );
}
LOGGER.info( "Finished." );
@@ -167,7 +168,7 @@ final public class SimpleMedDatesFinder
*/
private void processWindow( final JCas jCas,
final Annotation window,
- final Collection<IdentifiedAnnotation> annotations ) {
+ final Collection<Annotation> annotations ) {
final int offset = window.getBegin();
final String text = window.getCoveredText().toLowerCase();
final List<Integer> startSpans = getTextIndices( text, "started" );
@@ -177,16 +178,19 @@ final public class SimpleMedDatesFinder
}
final List<Pair<Integer>> spans = new ArrayList<>();
- final Map<Pair<Integer>, MedicationEventMention> medMap = new HashMap<>();
+ final Map<Pair<Integer>, EventMention> medMap = new HashMap<>();
final Map<Pair<Integer>, Calendar> calendarMap = new HashMap<>();
- for ( IdentifiedAnnotation annotation : annotations ) {
- if ( annotation instanceof MedicationEventMention
+ for ( Annotation annotation : annotations ) {
+ if ( (annotation instanceof MedicationEventMention || annotation instanceof MedicationMention)
+ && !spans.contains( createTextSpan( annotation, offset ) )
&& (_cuiList.isEmpty()
- || OntologyConceptUtil.getCuis( annotation ).stream().anyMatch( _cuiList::contains )) ) {
+ || OntologyConceptUtil.getCuis( (IdentifiedAnnotation)annotation )
+ .stream()
+ .anyMatch( _cuiList::contains )) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
- medMap.put( span, (MedicationEventMention)annotation );
+ medMap.put( span, (EventMention)annotation );
} else if ( annotation instanceof TimeMention ) {
final Calendar calendar = CalendarUtil.getCalendar( (TimeMention)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
@@ -194,13 +198,25 @@ final public class SimpleMedDatesFinder
spans.add( span );
calendarMap.put( span, calendar );
}
- } else if ( annotation instanceof DateAnnotation ) {
+ } else if ( annotation instanceof DateAnnotation
+ && !spans.contains( createTextSpan( annotation, offset ) ) ) {
final Calendar calendar = CalendarUtil.getCalendar( (DateAnnotation)annotation );
if ( !NULL_CALENDAR.equals( calendar ) ) {
final Pair<Integer> span = createTextSpan( annotation, offset );
spans.add( span );
calendarMap.put( span, calendar );
}
+ } else if ( annotation instanceof Chunk
+ && annotation.getCoveredText().length() >= 6
+ && annotation.getCoveredText().length() <= 10
+ && PARTIAL_DATE.matcher( annotation.getCoveredText() ).matches()
+ && !spans.contains( createTextSpan( annotation, offset ) ) ) {
+ final Calendar calendar = CalendarUtil.getSlashCalendar( annotation.getCoveredText() );
+ if ( !NULL_CALENDAR.equals( calendar ) ) {
+ final Pair<Integer> span = createTextSpan( annotation, offset );
+ spans.add( span );
+ calendarMap.put( span, calendar );
+ }
}
}
processSpans( jCas, spans, medMap, calendarMap, startSpans, stopSpans );
@@ -209,7 +225,7 @@ final public class SimpleMedDatesFinder
static private void processSpans( final JCas jCas,
final List<Pair<Integer>> medDateSpans,
- final Map<Pair<Integer>, MedicationEventMention> medMap,
+ final Map<Pair<Integer>, EventMention> medMap,
final Map<Pair<Integer>, Calendar> calendarMap,
final List<Integer> startSpans,
final List<Integer> stopSpans ) {
@@ -218,42 +234,65 @@ final public class SimpleMedDatesFinder
Date startDate = null;
Date stopDate = null;
+ TimeMention startMention = null;
+ TimeMention stopMention = null;
for ( int z = medDateSpans.size() - 1; z >= 0; z-- ) {
final Pair<Integer> span = medDateSpans.get( z );
- if ( span.getValue1() > startIndex && span.getValue1() < startIndex + 5 ) {
+ int startSpanBegin = startSpans.get( startIndex );
+ int stopSpanBegin = stopSpans.get( stopIndex );
+ if ( span.getValue1() > startSpanBegin && span.getValue1() < startSpanBegin + 15 ) {
final Calendar start = calendarMap.get( span );
if ( start != null ) {
startDate = CalendarUtil.createTypeDate( jCas, start );
+ startMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
+ startMention.setDate( startDate );
+ startIndex = Math.max( startIndex - 1, 0 );
}
- } else if ( span.getValue1() > stopIndex && span.getValue1() < stopIndex + 5 ) {
+ } else if ( span.getValue1() > stopSpanBegin && span.getValue1() < stopSpanBegin + 15 ) {
final Calendar stop = calendarMap.get( span );
if ( stop != null ) {
stopDate = CalendarUtil.createTypeDate( jCas, stop );
+ stopMention = new TimeMention( jCas, span.getValue1(), span.getValue2() );
+ stopMention.setDate( stopDate );
+ stopIndex = Math.max( stopIndex - 1, 0 );
+ startDate = null;
+ startMention = null;
}
} else {
- final MedicationEventMention med = medMap.get( span );
+ final EventMention med = medMap.get( span );
if ( med == null ) {
// possibly some interrupting date? Reset the dates.
startDate = null;
+ startMention = null;
stopDate = null;
+ stopMention = null;
continue;
}
- if ( startDate != null ) {
- med.setStartDate( startDate );
- }
- if ( stopDate != null ) {
- med.setEndDate( stopDate );
+ if ( med instanceof MedicationEventMention ) {
+ if ( startDate != null ) {
+ ((MedicationEventMention)med).setStartDate( startDate );
+ }
+ if ( stopDate != null ) {
+ ((MedicationEventMention)med).setEndDate( stopDate );
+ }
+ } else if ( med instanceof MedicationMention ) {
+ if ( startMention != null ) {
+ ((MedicationMention)med).setStartDate( startMention );
+ }
+ if ( stopMention != null ) {
+ ((MedicationMention)med).setEndDate( stopMention );
+ }
}
}
}
}
- static private Map<Annotation, Collection<IdentifiedAnnotation>> splitCovered(
- final Collection<IdentifiedAnnotation> annotations,
+ static private Map<Annotation, Collection<Annotation>> splitCovered(
+ final Collection<Annotation> annotations,
final Collection<Annotation> covering ) {
- final Map<Annotation, Collection<IdentifiedAnnotation>> covered = new HashMap<>();
- for ( IdentifiedAnnotation annotation : annotations ) {
+ final Map<Annotation, Collection<Annotation>> covered = new HashMap<>();
+ for ( Annotation annotation : annotations ) {
final int begin = annotation.getBegin();
for ( Annotation cover : covering ) {
if ( begin >= cover.getBegin() && begin < cover.getEnd() ) {
@@ -266,11 +305,15 @@ final public class SimpleMedDatesFinder
static private List<Integer> getTextIndices( final String windowText, final String searchText ) {
final String text = windowText.toLowerCase();
+ final int maxIndex = text.length() - 1;
final List<Integer> indices = new ArrayList<>();
int index = text.indexOf( searchText );
while ( index >= 0 ) {
indices.add( index );
- index = text.indexOf( searchText, index );
+ if ( index == maxIndex ) {
+ break;
+ }
+ index = text.indexOf( searchText, index + 1 );
}
return indices;
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/SimpleMedDatesPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/SimpleMedDatesPrinter.java?rev=1850997&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/SimpleMedDatesPrinter.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/SimpleMedDatesPrinter.java Fri Jan 11 01:17:55 2019
@@ -0,0 +1,82 @@
+package org.apache.ctakes.temporal.cc;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
+import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.Collection;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/10/2019
+ */
+@PipeBitInfo(
+ name = "SimpleMedDatesPrinter",
+ description = "Finds start and stop dates for medication events.",
+ role = PipeBitInfo.Role.WRITER,
+ dependencies = PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION
+)
+final public class SimpleMedDatesPrinter extends JCasAnnotator_ImplBase {
+
+ static private final Logger LOGGER = Logger.getLogger( "SimpleMedDatesPrinter" );
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ LOGGER.info( "Printing Medication Dates ..." );
+
+ final Collection<MedicationEventMention> medEvents = JCasUtil.select( jCas, MedicationEventMention.class );
+ for ( MedicationEventMention med : medEvents ) {
+ printDate( med.getCoveredText(), "Start", med.getStartDate() );
+ printDate( med.getCoveredText(), " Stop", med.getEndDate() );
+ }
+
+ final Collection<MedicationMention> meds = JCasUtil.select( jCas, MedicationMention.class );
+ for ( MedicationMention med : meds ) {
+ printTimeMention( med.getCoveredText(), "Start", med.getStartDate() );
+ printTimeMention( med.getCoveredText(), " Stop", med.getEndDate() );
+ }
+
+ }
+
+ static private void printTimeMention( final String med, final String dateType, final TimeMention timeMention ) {
+ if ( timeMention == null ) {
+ printDate( med, dateType, null );
+ } else {
+ printDate( med, dateType, timeMention.getDate() );
+ }
+ }
+
+ static private void printDate( final String med, final String dateType, final Date date ) {
+ if ( date == null ) {
+ LOGGER.info( med + " " + dateType + " = NO DATE" );
+ return;
+ }
+ final StringBuilder sb = new StringBuilder();
+ final String month = date.getMonth();
+ if ( month != null && !month.isEmpty() ) {
+ sb.append( " Month: " ).append( month );
+ }
+ final String day = date.getDay();
+ if ( day != null && !day.isEmpty() ) {
+ sb.append( " Day: " ).append( day );
+ }
+ final String year = date.getYear();
+ if ( year != null && !year.isEmpty() ) {
+ sb.append( " Year: " ).append( year );
+ }
+ LOGGER.info( med + " " + dateType + " = " + sb.toString() );
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java?rev=1850997&r1=1850996&r2=1850997&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/utils/CalendarUtil.java Fri Jan 11 01:17:55 2019
@@ -5,6 +5,7 @@ import com.mdimension.jchronic.Chronic;
import com.mdimension.jchronic.Options;
import com.mdimension.jchronic.tags.Pointer;
import com.mdimension.jchronic.utils.Span;
+import org.apache.ctakes.core.util.StringUtil;
import org.apache.ctakes.typesystem.type.refsem.Date;
import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
@@ -12,6 +13,7 @@ import org.apache.uima.jcas.JCas;
import java.util.ArrayList;
import java.util.Calendar;
+import java.util.GregorianCalendar;
import java.util.List;
import static java.util.Calendar.*;
@@ -126,6 +128,26 @@ final public class CalendarUtil {
}
/**
+ * @param text something with 0-2 month digits, 0-2 day digits and 4 year digits all divided by slash
+ * @return Calendar parsed from text, or {@link #NULL_CALENDAR}.
+ */
+ static public Calendar getSlashCalendar( final String text ) {
+ final String[] splits = StringUtil.fastSplit( text, '/' );
+ int month = 1;
+ final int monthI = parseInt( splits[ 0 ] );
+ if ( monthI > 0 ) {
+ month = monthI;
+ }
+ int day = 1;
+ final int dayI = parseInt( splits[ 1 ] );
+ if ( dayI > 0 ) {
+ day = dayI;
+ }
+ final int year = parseInt( splits[ 2 ] );
+ return new GregorianCalendar( year, month - 1, day );
+ }
+
+ /**
* @param text -
* @return positive int value of text or {@link Integer#MIN_VALUE} if not possible.
*/