You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/08 03:38:14 UTC

svn commit: r1850703 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java

Author: seanfinan
Date: Tue Jan  8 03:38:14 2019
New Revision: 1850703

URL: http://svn.apache.org/viewvc?rev=1850703&view=rev
Log:
Simple AE that sets start and end dates for medications

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java?rev=1850703&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/SimpleMedDatesFinder.java Tue Jan  8 03:38:14 2019
@@ -0,0 +1,414 @@
+package org.apache.ctakes.temporal.ae;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.typesystem.type.refsem.Date;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.*;
+
+import static java.util.Calendar.DAY_OF_MONTH;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/7/2019
+ */
+@PipeBitInfo(
+      name = "SimpleMedDatesFinder",
+      description = "Finds start and stop dates for events.",
+      role = PipeBitInfo.Role.ANNOTATOR,
+      dependencies = PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION,
+      products = PipeBitInfo.TypeProduct.TEMPORAL_RELATION
+)
+final public class SimpleMedDatesFinder extends JCasAnnotator_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "SimpleMedDatesFinder" );
+
+   /**
+    * specifies the type of window to use for lookup
+    */
+   static public final String PARAM_LOOKUP_WINDOW_ANNOTATION = "LookupWindow";
+   static public final String PARAM_SECTION_LIST_PATH = "SectionList";
+   static public final String PARAM_CUI_LIST_PATH = "CuiList";
+
+
+   static private final String DEFAULT_LOOKUP_WINDOW = "org.apache.ctakes.typesystem.type.textspan.Paragraph";
+   @ConfigurationParameter( name = PARAM_LOOKUP_WINDOW_ANNOTATION,
+         description = "Type of Lookup window to use.  Default is Paragraph.",
+         mandatory = false,
+         defaultValue = DEFAULT_LOOKUP_WINDOW
+   )
+   private String _windowClassName;
+
+
+   static public final String SECTION_LIST_DESC
+         = "Path to a file containing a list of sections of interest.  If none is specified then all sections are viable.";
+   @ConfigurationParameter(
+         name = PARAM_SECTION_LIST_PATH,
+         description = SECTION_LIST_DESC,
+         mandatory = false
+   )
+   private String _sectionListPath;
+
+
+   static public final String CUI_LIST_DESC
+         = "path to a file containing a list of cuis of interest.  If none is specified then all cuis are viable.";
+   @ConfigurationParameter(
+         name = PARAM_CUI_LIST_PATH,
+         description = CUI_LIST_DESC,
+         mandatory = false
+   )
+   private String _cuiListPath;
+
+
+   static private final Calendar NULL_CALENDAR = new Calendar.Builder().setDate( 1, 1, 1 ).build();
+//   static private final Options PAST_OPTIONS = new Options( Pointer.PointerType.PAST );
+
+   private Class<? extends Annotation> _lookupClass;
+   private final Collection<String> _sectionList = new ArrayList<>();
+   private final Collection<String> _cuiList = new ArrayList<>();
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext context ) throws ResourceInitializationException {
+      // Always call the super first
+      super.initialize( context );
+
+      loadSections();
+      loadCuis();
+
+      try {
+         final Class<?> windowClass = Class.forName( _windowClassName );
+         if ( !Annotation.class.isAssignableFrom( windowClass ) ) {
+            LOGGER.error( "Lookup Window Class " + _windowClassName + " not found" );
+            throw new ResourceInitializationException( new ClassNotFoundException() );
+         }
+         _lookupClass = (Class<? extends Annotation>)windowClass;
+      } catch ( ClassNotFoundException cnfE ) {
+         LOGGER.error( "Lookup Window Class " + _windowClassName + " not found" );
+         throw new ResourceInitializationException( cnfE );
+      }
+      LOGGER.info( "Using Simple Event Date lookup window type: " + _windowClassName );
+
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+      LOGGER.info( "Finding Simple Event Dates ..." );
+
+      final Map<Annotation, Collection<IdentifiedAnnotation>> windowAnnotationMap = new HashMap<>();
+      if ( _sectionList.isEmpty() ) {
+         windowAnnotationMap.putAll( JCasUtil.indexCovered( jCas, _lookupClass, IdentifiedAnnotation.class ) );
+      } else {
+         if ( _lookupClass.equals( Segment.class ) ) {
+            final Map<Segment, Collection<IdentifiedAnnotation>> sectionAnnotationMap
+                  = JCasUtil.indexCovered( jCas, Segment.class, IdentifiedAnnotation.class );
+            for ( Map.Entry<Segment, Collection<IdentifiedAnnotation>> sectionAnnotations : sectionAnnotationMap.entrySet() ) {
+               final Segment section = sectionAnnotations.getKey();
+               if ( _sectionList.contains( section.getPreferredText() )
+                    || _sectionList.contains( section.getId() ) ) {
+                  windowAnnotationMap.put( section, sectionAnnotations.getValue() );
+               }
+            }
+         } else {
+            final Map<Segment, Collection<Annotation>> sectionWindowMap
+                  = JCasUtil.indexCovered( jCas, Segment.class, _lookupClass );
+            for ( Map.Entry<Segment, Collection<Annotation>> sectionWindows : sectionWindowMap.entrySet() ) {
+               final Segment section = sectionWindows.getKey();
+               if ( _sectionList.contains( section.getPreferredText() )
+                    || _sectionList.contains( section.getId() ) ) {
+                  final Collection<IdentifiedAnnotation> annotations
+                        = JCasUtil.selectCovered( jCas, IdentifiedAnnotation.class, section );
+                  windowAnnotationMap.putAll( splitCovered( annotations, sectionWindows.getValue() ) );
+               }
+            }
+
+         }
+      }
+
+      for ( Map.Entry<Annotation, Collection<IdentifiedAnnotation>> windowAnnotations : windowAnnotationMap.entrySet() ) {
+         processWindow( jCas, windowAnnotations.getKey(), windowAnnotations.getValue() );
+      }
+      LOGGER.info( "Finished." );
+   }
+
+
+   /**
+    * For instance "Aspirin, 10mg.  Started 1/1/2000.  Stopped 12/31/2000."
+    *
+    * @param jCas        ye olde ...
+    * @param window      contains relatable annotations.
+    * @param annotations -
+    */
+   private void processWindow( final JCas jCas,
+                               final Annotation window,
+                               final Collection<IdentifiedAnnotation> annotations ) {
+      final int offset = window.getBegin();
+      final String text = window.getCoveredText().toLowerCase();
+      final List<Integer> startSpans = getTextIndices( text, "started" );
+      final List<Integer> stopSpans = getTextIndices( text, "stopped" );
+      if ( startSpans.isEmpty() && stopSpans.isEmpty() ) {
+         return;
+      }
+
+      final List<Pair<Integer>> spans = new ArrayList<>();
+      final Map<Pair<Integer>, MedicationEventMention> medMap = new HashMap<>();
+      final Map<Pair<Integer>, Calendar> calendarMap = new HashMap<>();
+
+      for ( IdentifiedAnnotation annotation : annotations ) {
+         if ( annotation instanceof MedicationEventMention
+              && (_cuiList.isEmpty()
+                  || OntologyConceptUtil.getCuis( annotation ).stream().anyMatch( _cuiList::contains )) ) {
+            final Pair<Integer> span = createTextSpan( annotation, offset );
+            spans.add( span );
+            medMap.put( span, (MedicationEventMention)annotation );
+         } else if ( annotation instanceof TimeMention ) {
+            final Calendar calendar = getCalendar( (TimeMention)annotation );
+            if ( !NULL_CALENDAR.equals( calendar ) ) {
+               final Pair<Integer> span = createTextSpan( annotation, offset );
+               spans.add( span );
+               calendarMap.put( span, calendar );
+            }
+         } else if ( annotation instanceof DateAnnotation ) {
+            final Calendar calendar = getCalendar( (DateAnnotation)annotation );
+            if ( !NULL_CALENDAR.equals( calendar ) ) {
+               final Pair<Integer> span = createTextSpan( annotation, offset );
+               spans.add( span );
+               calendarMap.put( span, calendar );
+            }
+         }
+      }
+      processSpans( jCas, spans, medMap, calendarMap, startSpans, stopSpans );
+   }
+
+
+   static private void processSpans( final JCas jCas,
+                                     final List<Pair<Integer>> medDateSpans,
+                                     final Map<Pair<Integer>, MedicationEventMention> medMap,
+                                     final Map<Pair<Integer>, Calendar> calendarMap,
+                                     final List<Integer> startSpans,
+                                     final List<Integer> stopSpans ) {
+      int startIndex = startSpans.size() - 1;
+      int stopIndex = stopSpans.size() - 1;
+
+      Date startDate = null;
+      Date stopDate = null;
+
+      for ( int z = medDateSpans.size() - 1; z >= 0; z-- ) {
+         final Pair<Integer> span = medDateSpans.get( z );
+         if ( span.getValue1() > startIndex && span.getValue1() < startIndex + 5 ) {
+            final Calendar start = calendarMap.get( span );
+            if ( start != null ) {
+               startDate = new Date( jCas );
+               startDate.setDay( "" + start.get( DAY_OF_MONTH ) );
+               startDate.setMonth( "" + (start.get( Calendar.MONTH ) + 1) );
+               startDate.setYear( "" + start.get( Calendar.YEAR ) );
+            }
+         } else if ( span.getValue1() > stopIndex && span.getValue1() < stopIndex + 5 ) {
+            final Calendar stop = calendarMap.get( span );
+            if ( stop != null ) {
+               stopDate = new Date( jCas );
+               stopDate.setDay( "" + stop.get( DAY_OF_MONTH ) );
+               stopDate.setMonth( "" + (stop.get( Calendar.MONTH ) + 1) );
+               stopDate.setYear( "" + stop.get( Calendar.YEAR ) );
+            }
+         } else {
+            final MedicationEventMention med = medMap.get( span );
+            if ( med == null ) {
+               // possibly some interrupting date?  Reset the dates.
+               startDate = null;
+               stopDate = null;
+               continue;
+            }
+            if ( startDate != null ) {
+               med.setStartDate( startDate );
+            }
+            if ( stopDate != null ) {
+               med.setEndDate( stopDate );
+            }
+         }
+      }
+   }
+
+   static private Map<Annotation, Collection<IdentifiedAnnotation>> splitCovered(
+         final Collection<IdentifiedAnnotation> annotations,
+         final Collection<Annotation> covering ) {
+      final Map<Annotation, Collection<IdentifiedAnnotation>> covered = new HashMap<>();
+      for ( IdentifiedAnnotation annotation : annotations ) {
+         final int begin = annotation.getBegin();
+         for ( Annotation cover : covering ) {
+            if ( begin >= cover.getBegin() && begin < cover.getEnd() ) {
+               covered.computeIfAbsent( cover, c -> new ArrayList<>() ).add( annotation );
+            }
+         }
+      }
+      return covered;
+   }
+
+   static private List<Integer> getTextIndices( final String windowText, final String searchText ) {
+      final String text = windowText.toLowerCase();
+      final List<Integer> indices = new ArrayList<>();
+      int index = text.indexOf( searchText );
+      while ( index >= 0 ) {
+         indices.add( index );
+         index = text.indexOf( searchText, index );
+      }
+      return indices;
+   }
+
+   static private Pair<Integer> createTextSpan( final Annotation annotation, final int offset ) {
+      return new Pair<>( annotation.getBegin() - offset, annotation.getEnd() - offset );
+   }
+
+
+   static private Calendar getCalendar( final TimeMention timeMention ) {
+      final org.apache.ctakes.typesystem.type.refsem.Date typeDate = timeMention.getDate();
+      if ( typeDate != null ) {
+         final int year = parseInt( typeDate.getYear() );
+         final int month = parseInt( typeDate.getMonth() );
+         final int day = parseInt( typeDate.getDay() );
+         if ( year == Integer.MIN_VALUE || month == Integer.MIN_VALUE || day == Integer.MIN_VALUE ) {
+            return NULL_CALENDAR;
+         }
+         LOGGER.debug( "TimeMention Date " + year + "" + month + "" + day );
+         return new Calendar.Builder().setDate( year, month - 1, day ).build();
+      }
+      return getCalendar( timeMention.getCoveredText() );
+   }
+
+   static private Calendar getCalendar( final DateAnnotation dateAnnotation ) {
+      return getCalendar( dateAnnotation.getCoveredText() );
+   }
+
+   static private Calendar getCalendar( final String text ) {
+      if ( isLousyDateText( text ) ) {
+         return NULL_CALENDAR;
+      }
+//      final Span span = Chronic.parse( text, PAST_OPTIONS );
+//      if ( span == null ) {
+//         return NULL_CALENDAR;
+//      }
+//      return span.getEndCalendar();
+
+      return NULL_CALENDAR;
+   }
+
+
+   static private boolean isLousyDateText( final String text ) {
+      if ( text.length() < 7 ) {
+         return true;
+      }
+      for ( char c : text.toCharArray() ) {
+         if ( Character.isDigit( c ) ) {
+            return false;
+         }
+      }
+      return true;
+   }
+
+   static private int parseInt( final String text ) {
+      if ( text == null || text.isEmpty() ) {
+         return Integer.MIN_VALUE;
+      }
+      for ( char c : text.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            return Integer.MIN_VALUE;
+         }
+      }
+      try {
+         return Integer.parseInt( text );
+      } catch ( NumberFormatException nfE ) {
+         return Integer.MIN_VALUE;
+      }
+   }
+
+
+   synchronized protected void loadSections() throws ResourceInitializationException {
+      if ( _sectionListPath == null ) {
+         return;
+      }
+      loadList( _sectionListPath, _sectionList );
+   }
+
+
+   synchronized protected void loadCuis() throws ResourceInitializationException {
+      if ( _cuiListPath == null ) {
+         return;
+      }
+      loadList( _cuiListPath, _cuiList );
+   }
+
+
+   synchronized protected void loadList( final String filePath, final Collection<String> list )
+         throws ResourceInitializationException {
+      if ( filePath == null ) {
+         return;
+      }
+      LOGGER.info( "Parsing " + filePath );
+      try ( BufferedReader reader = new BufferedReader( new InputStreamReader( FileLocator
+            .getAsStream( filePath ) ) ) ) {
+         String line = reader.readLine();
+         while ( line != null ) {
+            final String value = readBsvLine( line );
+            if ( !value.isEmpty() ) {
+               list.add( value );
+            }
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         throw new ResourceInitializationException( ioE );
+      }
+      LOGGER.info( "Finished Parsing" );
+   }
+
+   /**
+    * @param line double-bar separated text
+    */
+   private String readBsvLine( final String line ) {
+      if ( line.isEmpty() || line.startsWith( "#" ) || line.startsWith( "//" ) ) {
+         // comment
+         return "";
+      }
+      final String[] splits = line.split( "\\|" );
+      if ( splits.length >= 1 ) {
+         // We are only interested in the first entry
+         return splits[ 0 ].trim();
+      }
+      return "";
+   }
+
+
+   static public AnalysisEngineDescription createEngineDescription( final String cuiListPath )
+         throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngineDescription( SimpleMedDatesFinder.class,
+            PARAM_CUI_LIST_PATH, cuiListPath );
+   }
+
+}