You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/11/15 00:03:28 UTC

svn commit: r1815277 - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/ae/ ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/ ctakes-regression-test/ ctakes-regression-test/src/test/java/org/apa...

Author: seanfinan
Date: Wed Nov 15 00:03:28 2017
New Revision: 1815277

URL: http://svn.apache.org/viewvc?rev=1815277&view=rev
Log:
CTAKES-441 : Add LabValueFinder annotator.  Many thanks to the original author Kean Kaufmann.

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java
    ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv
    ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml
    ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java
Modified:
    ctakes/trunk/ctakes-regression-test/pom.xml
    ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java Wed Nov 15 00:03:28 2017
@@ -0,0 +1,396 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.textspan.DefaultAspanComparator;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.ResultOfTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.*;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.Role.ANNOTATOR;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
+
+/**
+ * See Jira at https://issues.apache.org/jira/browse/CTAKES-441
+ *
+ * @author Kean Kaufmann
+ * @since 11/13/2017
+ */
+@PipeBitInfo(
+      name = "LabValueFinder",
+      description = "Associates Lab Mentions with values.",
+      role = ANNOTATOR,
+      dependencies = { SECTION, BASE_TOKEN, IDENTIFIED_ANNOTATION },
+      products = { GENERIC_RELATION }
+)
+final public class LabValueFinder extends JCasAnnotator_ImplBase {
+
+   public static final String PARAM_ALL_SECTIONS = "allSections";
+   public static final String PARAM_SECTIONS = "sections";
+   public static final String PARAM_VALUE_WORDS = "valueWords";
+   public static final String PARAM_MAX_NEWLINES = "maxLineCount";
+   public static final int DEFAULT_MAX_LINE_COUNT = 2;
+   public static final String PARAM_LAB_TUIS = "labTUIs";
+   public static final String PARAM_LAB_X_CUIS = "excludeCUIs";
+
+   static private final String[] REQUIRED_SECTIONS = { "2.16.840.1.113883.10.20.22.2.3.1" };
+   static private final String[] REQUIRED_VALUE_WORDS = { "positive", "negative", "elevated", "normal", "increased", "decreased" };
+   static private final String[] REQUIRED_LAB_TUIS = {
+         "T059",    // Laboratory Procedure
+         "T060",    // Diagnostic Procedure (e.g. GFR)
+         "T201" }; // Clinical Attribute (e.g. A/G Ratio)
+   static private final String[] REQUIRED_EXCLUDE_CUIS = {
+         "C1443182",     // "Calculated (procedure)"
+         "C1715372",    // "Medical problem"
+         "C1441604" }; // "High sensitivity"
+
+   static final Logger LOGGER = Logger.getLogger( "LabValueFinder" );
+
+   @ConfigurationParameter( name = PARAM_ALL_SECTIONS,
+         description = "Use all Annotatable sections.  This ignores the value of " + PARAM_SECTIONS,
+         defaultValue = "true",
+         mandatory = false )
+   private String _useAllSectionText;
+   private boolean _useAllSections;
+
+   @ConfigurationParameter( name = PARAM_SECTIONS,
+         description = "Annotatable sections",
+         defaultValue = {},
+         mandatory = false )
+   private String[] _annotatableSections;
+   private Collection<String> annotatableSections;
+
+   @ConfigurationParameter( name = PARAM_VALUE_WORDS,
+         description = "Words indicating values",
+         defaultValue = {},
+         mandatory = false )
+   private String[] _valueWords;
+   private Collection<String> valueWords;
+
+   @ConfigurationParameter( name = PARAM_MAX_NEWLINES,
+         description = "Maximum newlines between lab and value",
+         mandatory = false )
+   private int maxLineCount = DEFAULT_MAX_LINE_COUNT;
+
+   @ConfigurationParameter( name = PARAM_LAB_TUIS,
+         description = "TUIs indicating lab measurements",
+         defaultValue = {} )
+   private String[] _labTuis;
+   private Collection<String> labTuis;
+
+   @ConfigurationParameter( name = PARAM_LAB_X_CUIS,
+         description = "CUIs not indicating specific lab measurements",
+         defaultValue = {},
+         mandatory = false )
+   private String[] _excludeCuis;
+   private Collection<String> excludeCuis;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext context ) throws ResourceInitializationException {
+      // Always call the super first
+      super.initialize( context );
+
+      _useAllSections = Boolean.parseBoolean( _useAllSectionText );
+      // Start the lists with the required parameters, then add user parameters.
+      annotatableSections = gatherParameters( REQUIRED_SECTIONS, _annotatableSections );
+      valueWords = gatherParameters( REQUIRED_VALUE_WORDS, _valueWords );
+      labTuis = gatherParameters( REQUIRED_LAB_TUIS, _labTuis );
+      excludeCuis = gatherParameters( REQUIRED_EXCLUDE_CUIS, _excludeCuis );
+
+      LOGGER.debug( PARAM_MAX_NEWLINES + " = " + maxLineCount );
+      LOGGER.info( labTuis.size() + " lab TUIs: " + labTuis.toString() );
+   }
+
+   /**
+    * @param requiredValues -
+    * @param userValues     -
+    * @return A collection of all values in upper case
+    */
+   static private Collection<String> gatherParameters( final String[] requiredValues, final String[] userValues ) {
+      final Collection<String> values = Arrays.stream( requiredValues )
+            .map( String::toUpperCase )
+            .collect( Collectors.toSet() );
+      for ( String value : userValues ) {
+         values.add( value.toUpperCase() );
+      }
+      return values;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+      LOGGER.info( "Associating Labs with values ..." );
+
+      final List<Class<? extends Annotation>> valueClasses = Arrays.asList( NumToken.class, FractionAnnotation.class );
+
+      // Time may filter wanted clock positions such as in breast cancer
+      final Map<Annotation, List<IdentifiedAnnotation>> filterMap = createCoveringMap( jCas, valueClasses,
+            Arrays.asList( DateAnnotation.class, TimeAnnotation.class ) );
+
+      final Map<Annotation, List<IdentifiedAnnotation>> subsumeMap = createCoveringMap( jCas, valueClasses,
+            Arrays.asList( FractionAnnotation.class, RangeAnnotation.class, MeasurementAnnotation.class ) );
+
+      for ( Segment segment : JCasUtil.select( jCas, Segment.class ) ) {
+         if ( _useAllSections || annotatableSections.isEmpty() || annotatableSections.contains( segment.getId() ) ) {
+            final List<LabMention> mentions = annotateMentions( jCas, segment );
+            fillInValues( jCas, mentions, filterMap, subsumeMap, segment.getBegin(), segment.getEnd() );
+         }
+      }
+
+      LOGGER.info( "Finished." );
+   }
+
+   /**
+    * @param jCas    -
+    * @param segment -
+    * @return Existing and extracted LabMentions in the segment.
+    */
+   private List<LabMention> annotateMentions( final JCas jCas, final Segment segment ) {
+      final List<LabMention> labMentions = new ArrayList<>();
+      for ( IdentifiedAnnotation annotation : JCasUtil.selectCovered( jCas, IdentifiedAnnotation.class, segment ) ) {
+         // first check to see if the annotation is a lab mention.
+         if ( LabMention.class.isInstance( annotation ) ) {
+            // Check for existing value.
+            final ResultOfTextRelation relation = ((LabMention) annotation).getLabValue();
+            if ( relation != null && relation.getArg2() != null ) {
+               // LabMention is already fully established with a value.  Skip it.
+               continue;
+            } else if ( relation == null ) {
+               // LabMention needs a value relation.
+               initValueRelation( jCas, (LabMention) annotation );
+            }
+            // add the LabMention and move on.
+            labMentions.add( (LabMention) annotation );
+            continue;
+         }
+         // Annotation was not a LabMention, but check to see if any part of it can be.
+         final Collection<UmlsConcept> validConcepts
+               = OntologyConceptUtil.getUmlsConceptStream( annotation )
+               .filter( c -> labTuis.contains( c.getTui() ) )
+               .filter( c -> !excludeCuis.contains( c.getCui() ) ).collect( Collectors.toList() );
+         if ( validConcepts.isEmpty() ) {
+            continue;
+         }
+         // We have valid lab concepts in the annotation.  Create an overlapping LabMention with those concepts.
+         final LabMention lab = createLabMention( jCas, validConcepts, annotation.getBegin(), annotation.getEnd() );
+         labMentions.add( lab );
+      }
+      return labMentions;
+   }
+
+   /**
+    * @param jCas -
+    * @param lab  for which a value relation should be initialized.
+    */
+   static private void initValueRelation( final JCas jCas, final LabMention lab ) {
+      final ResultOfTextRelation relation = new ResultOfTextRelation( jCas );
+      RelationArgument arg1 = new RelationArgument( jCas );
+      arg1.setArgument( lab );
+      relation.setArg1( arg1 );
+      // set lab mention's value as the relation.
+      lab.setLabValue( relation );
+   }
+
+   /**
+    * @param jCas     -
+    * @param concepts Lab concepts.
+    * @param begin    begin index for a new LabMention.
+    * @param end      end index for a new LabMention.
+    * @return a new LabMention with the given attributes.
+    */
+   static private LabMention createLabMention( final JCas jCas, final Collection<UmlsConcept> concepts,
+                                               final int begin, final int end ) {
+      final LabMention lab = new LabMention( jCas, begin, end );
+      lab.setId( CONST.NE_TYPE_ID_LAB );
+      lab.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_EXPLICIT_AE );
+      // copy in the concepts.
+      final FSArray conceptArray = new FSArray( jCas, concepts.size() );
+      int arrIdx = 0;
+      for ( UmlsConcept umlsConcept : concepts ) {
+         conceptArray.set( arrIdx, umlsConcept );
+         arrIdx++;
+      }
+      lab.setOntologyConceptArr( conceptArray );
+      // create value relation, argument 1 is the lab mention.
+      initValueRelation( jCas, lab );
+      lab.addToIndexes();
+      LOGGER.debug( "created " + getDebugText( lab ) );
+      return lab;
+   }
+
+   /**
+    * @param docText      -
+    * @param segmentBegin begin index of the section.
+    * @param segmentEnd   end index of the section.
+    * @return List of all newline indices in the section, plus the end of the section.
+    */
+   static private List<Integer> getNewLines( final String docText, final int segmentBegin, final int segmentEnd ) {
+      final List<Integer> newLines = new ArrayList<>();
+      int index = docText.indexOf( '\n', segmentBegin );
+      while ( index >= 0 && index < segmentEnd ) {
+         newLines.add( index );
+         index = docText.indexOf( '\n', index + 1 );
+      }
+      newLines.add( segmentEnd );
+      return newLines;
+   }
+
+   private void fillInValues( final JCas jCas,
+                              final List<LabMention> labs,
+                              final Map<Annotation, List<IdentifiedAnnotation>> filterMap,
+                              final Map<Annotation, List<IdentifiedAnnotation>> subsumeMap,
+                              final int segmentBegin,
+                              final int segmentEnd ) {
+      if ( labs == null || labs.isEmpty() ) {
+         return;
+      }
+      final List<Integer> newLines = getNewLines( jCas.getDocumentText(), segmentBegin, segmentEnd );
+
+      final List<LabMention> sortedLabs = sortOverlapsByLength( labs );
+      final int numMentions = sortedLabs.size();
+      final Collection<Annotation> candidateSet = new HashSet<>();
+      for ( int i = 0; i < numMentions; i++ ) {
+         candidateSet.clear();
+         final LabMention lab = sortedLabs.get( i );
+         Annotation value = null;
+         final LabMention nextLab = (i + 1 < sortedLabs.size()) ? sortedLabs.get( i + 1 ) : null;
+         final int nextLabBegin = nextLab != null ? nextLab.getBegin() : newLines.get( newLines.size() - 1 );
+         final int windowBegin = lab.getEnd();
+         final int windowEnd = getValueWindowEnd( windowBegin, nextLabBegin, newLines );
+
+         LOGGER.debug( "Seeking value for: " + getDebugText( lab ) + " between " + windowBegin + " and " + windowEnd );
+         for ( NumToken numToken : JCasUtil.selectCovered( jCas, NumToken.class, windowBegin, windowEnd ) ) {
+            LOGGER.debug( "   " + getDebugText( numToken ) );
+            final List<IdentifiedAnnotation> filters = filterMap.get( numToken );
+            if ( filters != null && !filters.isEmpty() ) {
+               LOGGER.debug( "      Filtering due to " + getDebugText( filters.get( 0 ) ) );
+            } else {
+               final List<IdentifiedAnnotation> subsumers = subsumeMap.getOrDefault( numToken, Collections.emptyList() );
+               if ( subsumers.isEmpty() ) {
+                  candidateSet.add( numToken );
+               } else {
+                  candidateSet.addAll( subsumers );
+                  LOGGER.debug( "subsuming candidate: " + getDebugText( numToken ) );
+               }
+            }
+         }
+         if ( !candidateSet.isEmpty() ) {
+            // get first, shortest span value
+            final List<Annotation> candidateList = new ArrayList<>( candidateSet );
+            candidateList.sort( DefaultAspanComparator.getInstance() );
+            // prefer non-range values, if any
+            value = candidateList.stream()
+                  .filter( a -> !(a instanceof RangeAnnotation) )
+                  .findFirst()
+                  .orElse( candidateList.get( 0 ) );
+            LOGGER.debug( "Set to value: " + getDebugText( value ) );
+         } else {
+            // attempt to find a text (word) value
+            value = JCasUtil.selectCovered( jCas, WordToken.class, windowBegin, windowEnd ).stream()
+                  .filter( w -> valueWords.contains( w.getCoveredText().toUpperCase() ) )
+                  .findFirst()
+                  .orElse( null );
+         }
+         if ( value != null ) {
+            LOGGER.debug( "setting lab value to " + getDebugText( value ) );
+            final RelationArgument arg2 = new RelationArgument( jCas );
+            arg2.setArgument( value );
+            lab.getLabValue().setArg2( arg2 );
+         }
+      }
+   }
+
+   // first of: start of next mention, start of max newline, or end of segment
+   private int getValueWindowEnd( final int windowBegin, final int nextLabBegin, final List<Integer> newLines ) {
+      int eolSkips = 0;
+      int maxNewLine = newLines.get( newLines.size() - 1 );
+      for ( Integer newLine : newLines ) {
+         if ( newLine >= windowBegin ) {
+            eolSkips++;
+            if ( eolSkips > maxLineCount ) {
+               break;
+            }
+            maxNewLine = newLine;
+            if ( newLine > nextLabBegin ) {
+               break;
+            }
+         }
+      }
+      return Math.min( maxNewLine, nextLabBegin );
+   }
+
+
+   @SuppressWarnings( { "rawtypes", "unchecked" } ) // hold my beer and watch this...
+   static private Map<Annotation, List<IdentifiedAnnotation>> createCoveringMap( final JCas jCas,
+                                                                                 final List<Class<? extends Annotation>> coveredClasses,
+                                                                                 final List<Class<? extends IdentifiedAnnotation>> coveringClasses ) {
+      final Map<Annotation, List<IdentifiedAnnotation>> allCovering = new HashMap<>();
+      for ( Class covered : coveredClasses ) {
+         for ( Class covering : coveringClasses ) {
+            allCovering.putAll( JCasUtil.indexCovering( jCas, covered, covering ) );
+         }
+      }
+      return allCovering;
+   }
+
+   /**
+    * The method name does not really describe what it does, but it is close.
+    *
+    * @param list -
+    * @param <T>  we only deal with annotations.
+    * @return a sorted list.
+    */
+   static private <T extends Annotation> List<T> sortOverlapsByLength( final List<T> list ) {
+      final List<T> sortedList = new ArrayList<>( list );
+      sortedList.sort( ( a1, a2 ) -> {
+         int begin1 = a1.getBegin();
+         int end1 = a1.getEnd();
+         int begin2 = a2.getBegin();
+         int end2 = a2.getEnd();
+         int beginCompare = Integer.compare( begin1, begin2 );
+         return ((beginCompare < 0) ? Integer.compare( end1, begin2 )
+               : (beginCompare == 0) ? Integer.compare( end1, end2 ) : Integer.compare( begin1, end2 ));
+      } );
+      return sortedList;
+   }
+
+   static private String getDebugText( final Annotation a ) {
+      return a.getType().getShortName() + "(" + a.getBegin() + "-" + a.getEnd() + "): " + a.getCoveredText();
+   }
+
+
+   public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngineDescription( LabValueFinder.class );
+   }
+
+   public static AnalysisEngineDescription createAnnotatorDescription( final Object... objects ) throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngineDescription( LabValueFinder.class, objects );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv (added)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv Wed Nov 15 00:03:28 2017
@@ -0,0 +1,42 @@
+// CUI|TUI|Text|preferredTerm
+C0201838|T059|Albumin
+C0202202|T059|Protein
+C0201850|T059|alkaline phosphatase|Alkaline phosphatase measurement
+C0201836|T059|ALT|Alanine aminotransferase measurement
+C0201899|T059|AST|Aspartate aminotransferase measurement
+C0201913|T059|bilirubin|Bilirubin, total measurement
+C0036808|T059|Bilirubin, Indirect
+C0858048|T059|Bilirubin, Direct
+C0201973|T059|Total CK
+C0523584|T059|CK-MB|Creatine kinase MB measurement
+C0523584|T059|CKMB|Creatine kinase MB measurement
+C0023508|T060|white count|White Blood Cell Count procedure
+C0201803|T059|osmolality|Osmolality Measurement
+C0017564|T060|GFR|Glomerular Filtration Rate
+C0588466|T059|RBC, UA|Red blood cells urine (lab test)
+C0000010|T059|WBC, UA|White blood cells urine (lab test)
+C0201837|T201|A/G Ratio|Albumin/Globulin ratio
+C0373670|T059|Lipase|Lipase measurement
+C0033707|T059|Protime|Prothrombin time assay
+C0525032|T059|INR|International Normalized Ratio
+C1443182|T059|Calc|Calculated (procedure)
+C00337443|T059|sodium|Sodium measurement
+C00202194|T059|potassium|Potassium measurement
+C00003074|T201|Anion Gap
+C00202230|T059|TSH|Thyroid stimulating hormone measurement
+C01171408|T059|LDL/HDL|High density/low density lipoprotein ratio measurement
+C00518015|T059|hemoglobin|Hemoglobin measurement
+C00032181|T059|platelet count|Platelet Count measurement
+C00018935|T059|hematocrit|Hematocrit procedure
+C00201657|T059|CRP|C-reactive protein measurement
+C01535922|T059|procalcitonin|Procalcitonin measurement
+C00202115|T059|lactate|Lactic acid measurement
+C00202225|T059|free T4|T4 free measurement
+C00201934|T059|cardiac enzymes|Cardiac enzymes measurement
+C00337438|T059|glucose|Glucose measurement
+C00201802|T059|specific gravity|Specific gravity measurement
+C00200635|T059|lymphocytes|Lymphocyte Count measurement
+C00005845|T059|BUN|Blood urea nitrogen measurement
+C00201975|T059|creatinine|Creatinine measurement
+C01305866|T060|weight|Weighing patient
+C01305855|T201|BMI|Body mass index

Added: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml (added)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml Wed Nov 15 00:03:28 2017
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<lookupSpecification>
+    <dictionaries>
+        <dictionary>
+            <name>LabAnnotatorTestDict</name>
+            <implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.BsvRareWordDictionary
+            </implementationName>
+            <properties>
+                <property key="bsvPath" value="org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv"/>
+            </properties>
+        </dictionary>
+    </dictionaries>
+
+    <conceptFactories>
+        <conceptFactory>
+            <name>LabAnnotatorTestConcepts</name>
+            <implementationName>org.apache.ctakes.dictionary.lookup2.concept.BsvConceptFactory</implementationName>
+            <properties>
+                <property key="bsvPath" value="org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv"/>
+            </properties>
+        </conceptFactory>
+    </conceptFactories>
+
+    <!--  Defines what terms and concepts will be used  -->
+    <dictionaryConceptPairs>
+        <dictionaryConceptPair>
+            <name>LabAnnotatorPair</name>
+            <dictionaryName>LabAnnotatorTestDict</dictionaryName>
+            <conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
+        </dictionaryConceptPair>
+    </dictionaryConceptPairs>
+
+    <rareWordConsumer>
+        <name>Term Consumer</name>
+        <implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
+        <properties>
+            <property key="codingScheme" value="custom"/>
+        </properties>
+    </rareWordConsumer>
+
+</lookupSpecification>

Modified: ctakes/trunk/ctakes-regression-test/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/pom.xml?rev=1815277&r1=1815276&r2=1815277&view=diff
==============================================================================
--- ctakes/trunk/ctakes-regression-test/pom.xml (original)
+++ ctakes/trunk/ctakes-regression-test/pom.xml Wed Nov 15 00:03:28 2017
@@ -10,108 +10,17 @@
   <description>Apache cTAKES Regression-test</description>
   
 	<dependencies>
-        <!--  type system is added by utils.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-type-system</artifactId>-->
-        <!--</dependency>-->
-        <!--  core is added by everybody.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-core</artifactId>-->
-        <!--</dependency>-->
-        <!--  utils is added by core.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-utils</artifactId>-->
-        <!--</dependency>-->
-        <!--  jdom is added by core.  -->
-        <!--<dependency>-->
-        <!--<groupId>jdom</groupId>-->
-        <!--<artifactId>jdom</artifactId>-->
-        <!--</dependency>-->
-        <!--  junit is added by utils.  -->
-        <!--<dependency>-->
-        <!--<groupId>junit</groupId>-->
-        <!--<artifactId>junit</artifactId>-->
-        <!--</dependency>-->
-        <!--  context tokenizer is added by assertion.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-context-tokenizer</artifactId>-->
-        <!--</dependency>-->
-        <!--  pre-processor is added by clinical pipeline.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-preprocessor</artifactId>-->
-        <!--</dependency>-->
-        <!--  lvg is added by dependency parser.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-lvg</artifactId>-->
-        <!--</dependency>-->
-        <!--  chunker is added by assertion.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-chunker</artifactId>-->
-        <!--</dependency>-->
-        <!--  ne contexts is added by clinical pipeline.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-ne-contexts</artifactId>-->
-        <!--</dependency>-->
-        <!--  pos tagger is added by chunker.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-pos-tagger</artifactId>-->
-        <!--</dependency>-->
-        <!--  assertion is added by clinical pipeline.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-assertion</artifactId>-->
-        <!--</dependency>-->
-        <!--  dependency parser is added by assertion.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-dependency-parser</artifactId>-->
-        <!--</dependency>-->
-		<!--  If clinical-pipeline is a dependency then everything in clinical-pipeline should be removed.
-				This makes maintenance much easier wrt upgraded versions and removed/unused modules (e.g. old lookup).-->
-        <!--  clinical pipeline is added by relation extractor.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-clinical-pipeline</artifactId>-->
-        <!--</dependency>-->
-        <!--  drug-ner is added by smoking status, side effect.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-drug-ner</artifactId>-->
-        <!--</dependency>		-->
 		<dependency>
 			<groupId>org.apache.uima</groupId>
 			<artifactId>uimaj-cpe</artifactId>
 			<version>2.9.0</version>
 		</dependency>
+		<!--  Allow for use of example notes, example dictionaries, example models ...  -->
 		<dependency>
 			<groupId>org.apache.ctakes</groupId>
-			<artifactId>ctakes-clinical-pipeline</artifactId>
+			<artifactId>ctakes-examples</artifactId>
 		</dependency>
 		<dependency>
-			<groupId>org.apache.ctakes</groupId>
-			<artifactId>ctakes-smoking-status</artifactId>
-		</dependency>		
-        <!--  constituency parser is added by assertion.  -->
-        <!--<dependency>-->
-        <!--<groupId>org.apache.ctakes</groupId>-->
-        <!--<artifactId>ctakes-constituency-parser</artifactId>-->
-        <!--</dependency>		-->
-		<!--
-		<dependency>
-			<groupId>org.apache.ctakes</groupId>
-			<artifactId>ctakes-coreference</artifactId>
-		</dependency>
-			-->
-		<dependency>
 			<groupId>xmlunit</groupId>
 			<artifactId>xmlunit</artifactId>
 			<version>1.4</version>

Added: ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java (added)
+++ ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java Wed Nov 15 00:03:28 2017
@@ -0,0 +1,279 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.typesystem.type.textsem.LabMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import static org.apache.ctakes.core.ae.LabValueFinder.PARAM_ALL_SECTIONS;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/14/2017
+ */
+public class LabValueFinderTester {
+
+   static private final Logger LOGGER = Logger.getLogger( "LabValueFinderTester" );
+
+   static private final String LAB_RESULTS_OID = "2.16.840.1.113883.10.20.22.2.3.1";
+   static private final String DICT_DESC_PATH = "org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml";
+   static private AnalysisEngineDescription simpleSegmentator;
+   static private AnalysisEngineDescription labSegmentator;
+   static private AnalysisEngineDescription midPipeline;
+   static private AnalysisEngineDescription defaultLabAnnotator;
+   static private AnalysisEngineDescription sameLineLabAnnotator;
+
+   @BeforeClass
+   public static void setUpBeforeClass() throws UIMAException {
+      simpleSegmentator = SimpleSegmentAnnotator.createAnnotatorDescription();
+      labSegmentator = SimpleSegmentAnnotator.createAnnotatorDescription( LAB_RESULTS_OID );
+
+      final AggregateBuilder builder = new AggregateBuilder();
+      builder.add( SentenceDetector.createAnnotatorDescription() );
+      builder.add( TokenizerAnnotatorPTB.createAnnotatorDescription() );
+      builder.add( AnalysisEngineFactory.createEngineDescription( ContextDependentTokenizerAnnotator.class ) );
+      builder.add( AnalysisEngineFactory.createEngineDescription( POSTagger.class ) );
+      builder.add( DefaultJCasTermAnnotator.createAnnotatorDescription( DICT_DESC_PATH ) );
+      midPipeline = builder.createAggregateDescription();
+
+      defaultLabAnnotator = LabValueFinder.createAnnotatorDescription( PARAM_ALL_SECTIONS, "false" );
+      sameLineLabAnnotator = LabValueFinder.createAnnotatorDescription( LabValueFinder.PARAM_MAX_NEWLINES, 1, PARAM_ALL_SECTIONS, "false" );
+//      LabValueFinder.LOGGER.setLevel( Level.DEBUG );
+   }
+
+
+   /**
+    * @throws UIMAException -
+    */
+   @Test
+   public void testTable() throws UIMAException {
+      String text =
+            "Recent Results (from the past 24 hour(s))\n" +
+                  "HEPATIC FUNCTION PANEL\n" +
+                  "Collection Time: 12/04/15 5:40 PM\n" +
+                  "    Result Value Ref Range\n" +
+                  " Albumin 2.2 (*) 3.7 - 5.1 g/dL\n" +
+                  " Total Protein 5.5 (*) 5.8 - 8.0 g/dL\n" +
+                  " Alkaline Phosphatase 844 (*) 42 - 121 IU/L\n" +
+                  " ALT 30  10 - 60 Unit/L\n" +
+                  " AST 130 (*) 10 - 42 Unit/L\n" +
+                  " Total Bilirubin 1.3  0.4 - 1.3 mg/dL\n" +
+                  " Bilirubin, Direct 0.4 (*) 0.0 - 0.2 mg/dL\n" +
+                  " Bilirubin, Indirect 0.9  0.0 - 1.0 mg/dL\n" +
+                  "LIPASE\n" +
+                  "Collection Time:  12/04/15 7 PM\n" + // "7 PM" is covered by a TimeAnnotation
+                  "    Result Value Ref Range\n" +
+                  " Lipase 19 (*) 22 - 51 Unit/L\n" +
+                  "PROTIME-INR\n" +
+                  " Collection Time: 12/04/15 7:45 PM\n" + // "7:45 PM" isn't covered by a TimeAnnotation
+                  "    Result Value Ref Range\n" +
+                  " Protime 18.0 (*) 9.0 - 11.5 sec\n" +
+                  " INR 1.9\n" +
+                  "COMPREHENSIVE METABOLIC PANEL\n" +
+                  "Collection Time:  12/04/15 7:45 AM\n" +
+                  "Result Value Ref Range\n" +
+                  "GFR Calc , Female N-Blk 73 >60 mL/min\n" +
+                  "Osmolality Calc 281 266 - 309 mOsm/K\n" +
+                  "A/G Ratio 0.7 (*) 1.1 - 2.2\n" +
+                  "RBC, UA 1 0 - 2 /HPF\n" +
+                  "WBC, UA 5 (*) 0 - 4 /HPF\n" +
+                  "CK TOTAL AND CKMB\n" +
+                  "Collection Time:  12/04/15 10:00 AM\n" + // "10:00 AM" isn't covered by a TimeAnnotation
+                  "Result Value Ref Range\n" +
+                  "Total CK 125 30 - 240 Unit/L\n" +
+                  "CK-MB 1.3 0.0 - 9.0 ng/mL\n";
+      JCas jCas = processLabText( text );
+      assertLabMentions( jCas,
+            "Albumin", "2.2",
+            "Protein", "5.5",
+            "Alkaline Phosphatase", "844",
+            "ALT", "30",
+            "AST", "130",
+            "Bilirubin", "1.3",
+            "Bilirubin, Direct", "0.4",
+            "Bilirubin", "",  // We are not using term subsumption, so bilubrin shows up twice
+            "Bilirubin, Indirect", "0.9",
+            "Bilirubin", "",  // We are not using term subsumption, so bilubrin shows up twice
+            "LIPASE", "",
+            "Lipase", "19",
+            "PROTIME", "",
+            "INR", "7", // wrong, but time not annotated
+            "Protime", "18.0",
+            "INR", "1.9",
+            "GFR", "73",
+            "Osmolality", "281",
+            "A/G Ratio", "0.7",
+            "RBC, UA", "1",
+            "WBC, UA", "5",
+            "CKMB", "10", // wrong, but time not annotated
+            "Total CK", "125",
+            "CK-MB", "1.3"
+      );
+
+      // no lab mentions except in lab sections
+      jCas = processNonLabText( text );
+      assertLabMentions( jCas );
+   }
+
+   @Test
+   public void testRanges() throws UIMAException {
+      final String text =
+            "Sodium Latest Range: 135-145 mmol/L 138\n" +
+                  "Anion Gap Latest Range: 13-16 mmol/L\n" +
+                  "Potassium Latest Range: 3.5-5.3 mmol/L 3.8\n" + // range not annotated
+                  "TSH, High Sensitivity Latest Range: 0.450-5.100 uIU/mL 1.939\n" + // range not annotated
+                  "LDL/HDL Ratio No range found 2.6\n";
+      final JCas jCas = processLabText( text );
+      assertLabMentions( jCas,
+            "Sodium", "138",
+            "Anion Gap", "13-16", // nothing but range available, so we use that
+            "Potassium", "3.5", // should be "3.8", but range not annotated
+            "TSH", "0.450", // should be  "1.939", but range not annotated
+            "LDL/HDL", "2.6"
+      );
+   }
+
+   @Test
+   public void testFreeText() throws UIMAException {
+      String text =
+            "Weight / BMI:  Recent weight (as of 05/05/16) is\n" +
+                  "45.36 kg (100 lb).\n " +
+                  "Hemoglobin is 13.9, hematocrit 47.0, and platelet count\n" +
+                  "366,000. CRP was 36.77.  Procalcitonin was 1.32.  Lactate was\n" +
+                  "3.9. Free T4 was 1.3.  TSH was 2.82.  Point of care cardiac enzymes\n" +
+                  "were normal. CMS was normal except for an elevated potassium of\n" +
+                  "6, elevated anion gap of 27, elevated glucose of 153, elevated BUN\n" +
+                  "of 80, elevated creatinine of 1.9.  Low GFR 25.\n" +
+                  "\n" +
+                  "Urinalysis: Specific gravity 1.015, white count was elevated\n" +
+                  "29,100, with 69 segs, 20 bands, 5 lymphocytes, and\n" +
+                  "6 monos.\n";
+      JCas jCas = processLabText( text );
+      assertLabMentions( jCas,
+            "Weight", "",
+            "BMI", "",
+            "weight", "45.36 kg",
+            "Hemoglobin", "13.9",
+            "hematocrit", "47.0",
+            "platelet count", "366,000",
+            "CRP", "36.77",
+            "Procalcitonin", "1.32",
+            "Lactate", "3.9",
+            "Free T4", "1.3",
+            "TSH", "2.82",
+            "cardiac enzymes", "normal",
+            "potassium", "6",
+            "anion gap", "27",
+            "glucose", "153",
+            "BUN", "80",
+            "creatinine", "1.9",
+            "GFR", "25",
+            "Specific gravity", "1.015",
+            "white count", "29,100",
+            "lymphocytes", "6" // Should be "5", but LabsAnnotator doesn't handle values before words
+      );
+      jCas = processWithoutSpanningNewlines( text );
+      assertLabMentions( jCas,
+            "Weight", "",
+            "BMI", "",
+            "weight", "",
+            "Hemoglobin", "13.9",
+            "hematocrit", "47.0",
+            "platelet count", "",
+            "CRP", "36.77",
+            "Procalcitonin", "1.32",
+            "Lactate", "",
+            "Free T4", "1.3",
+            "TSH", "2.82",
+            "cardiac enzymes", "",
+            "potassium", "",
+            "anion gap", "27",
+            "glucose", "153",
+            "BUN", "",
+            "creatinine", "1.9",
+            "GFR", "25",
+            "Specific gravity", "1.015",
+            "white count", "elevated", //, // number on next line, so went for the word
+            "lymphocytes", ""
+      );
+   }
+
+   private JCas processLabText( final String text ) throws UIMAException {
+      return processText( text, true, true );
+   }
+
+   private JCas processNonLabText( final String text ) throws UIMAException {
+      return processText( text, false, true );
+   }
+
+   private JCas processWithoutSpanningNewlines( final String text ) throws UIMAException {
+      return processText( text, true, false );
+   }
+
+   private JCas processText( final String text, final boolean isLabText, final boolean spanNewlines ) throws UIMAException {
+      final JCas jCas = JCasFactory.createJCas();
+      jCas.setDocumentText( text );
+      SimplePipeline.runPipeline( jCas,
+            (isLabText) ? labSegmentator : simpleSegmentator,
+            midPipeline,
+            (spanNewlines) ? defaultLabAnnotator : sameLineLabAnnotator );
+      return jCas;
+   }
+
+   private void assertLabMentions( final JCas jCas, final String... expected ) {
+      final List<LabMention> labs = new ArrayList<>( JCasUtil.select( jCas, LabMention.class ) );
+      printLabMentions( jCas );
+      int expectedLength = expected.length;
+      assertEquals( "Number of labs is incorrect", 0, expectedLength % 2 );
+      assertEquals( "Number of labs is incorrect", expectedLength / 2, labs.size() );
+      for ( int i = 0; i < expectedLength; i += 2 ) {
+         final LabMention lab = labs.get( i / 2 );
+         assertEquals( "Lab is not the same", expected[ i ], lab.getCoveredText() );
+         if ( lab.getLabValue() != null && lab.getLabValue().getArg2() != null && lab.getLabValue().getArg2().getArgument() != null ) {
+            assertEquals( "Value is not the same for " + expected[ i ],
+                  expected[ i + 1 ], lab.getLabValue().getArg2().getArgument().getCoveredText() );
+         } else {
+            assertEquals( expected[ i + 1 ], "" );
+         }
+      }
+   }
+
+   private void printLabMentions( final JCas jCas ) {
+      for ( Segment segment : JCasUtil.select( jCas, Segment.class ) ) {
+         final Collection<LabMention> labs = JCasUtil.selectCovered( jCas, LabMention.class, segment );
+         LOGGER.info( "Section " + segment.getPreferredText() + " (" + segment.getId() + "): " + labs.size() + " lab(s)" );
+         for ( LabMention lab : labs ) {
+            if ( lab.getLabValue() != null && lab.getLabValue().getArg2() != null && lab.getLabValue().getArg2().getArgument() != null ) {
+               LOGGER.info( "   " + getDebugText( lab )
+                     + " value: " + getDebugText( lab.getLabValue().getArg2().getArgument() ) );
+            } else {
+               LOGGER.info( "   " + getDebugText( lab ) + " no value" );
+            }
+         }
+      }
+   }
+
+   static private String getDebugText( final Annotation a ) {
+      return a.getType().getShortName() + "(" + a.getBegin() + "-" + a.getEnd() + "): " + a.getCoveredText();
+   }
+
+}

Modified: ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java?rev=1815277&r1=1815276&r2=1815277&view=diff
==============================================================================
--- ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java (original)
+++ ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java Wed Nov 15 00:03:28 2017
@@ -42,8 +42,9 @@ public class CONST {
 	
 	public static final int NE_DISCOVERY_TECH_DICT_LOOKUP = 1;
 	public static final int NE_DISCOVERY_TECH_GOLD_ANNOTATION = 2;
-	
-	public static final int NE_POLARITY_NEGATION_ABSENT = 1;
+   public static final int NE_DISCOVERY_TECH_EXPLICIT_AE = 3;
+
+   public static final int NE_POLARITY_NEGATION_ABSENT = 1;
 	public static final int NE_POLARITY_NEGATION_PRESENT = -1;
 	
 	public static final int NE_UNCERTAINTY_PRESENT = 1;