You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/11/15 00:03:28 UTC
svn commit: r1815277 - in /ctakes/trunk:
ctakes-core/src/main/java/org/apache/ctakes/core/ae/
ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/
ctakes-regression-test/ ctakes-regression-test/src/test/java/org/apa...
Author: seanfinan
Date: Wed Nov 15 00:03:28 2017
New Revision: 1815277
URL: http://svn.apache.org/viewvc?rev=1815277&view=rev
Log:
CTAKES-441 : Add LabValueFinder annotator. Many thanks to the original author Kean Kaufmann.
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml
ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java
Modified:
ctakes/trunk/ctakes-regression-test/pom.xml
ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/LabValueFinder.java Wed Nov 15 00:03:28 2017
@@ -0,0 +1,396 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.core.util.textspan.DefaultAspanComparator;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.ResultOfTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.*;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.Role.ANNOTATOR;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
+
+/**
+ * See Jira at https://issues.apache.org/jira/browse/CTAKES-441
+ *
+ * @author Kean Kaufmann
+ * @since 11/13/2017
+ */
+@PipeBitInfo(
+ name = "LabValueFinder",
+ description = "Associates Lab Mentions with values.",
+ role = ANNOTATOR,
+ dependencies = { SECTION, BASE_TOKEN, IDENTIFIED_ANNOTATION },
+ products = { GENERIC_RELATION }
+)
+final public class LabValueFinder extends JCasAnnotator_ImplBase {
+
+ public static final String PARAM_ALL_SECTIONS = "allSections";
+ public static final String PARAM_SECTIONS = "sections";
+ public static final String PARAM_VALUE_WORDS = "valueWords";
+ public static final String PARAM_MAX_NEWLINES = "maxLineCount";
+ public static final int DEFAULT_MAX_LINE_COUNT = 2;
+ public static final String PARAM_LAB_TUIS = "labTUIs";
+ public static final String PARAM_LAB_X_CUIS = "excludeCUIs";
+
+ static private final String[] REQUIRED_SECTIONS = { "2.16.840.1.113883.10.20.22.2.3.1" };
+ static private final String[] REQUIRED_VALUE_WORDS = { "positive", "negative", "elevated", "normal", "increased", "decreased" };
+ static private final String[] REQUIRED_LAB_TUIS = {
+ "T059", // Laboratory Procedure
+ "T060", // Diagnostic Procedure (e.g. GFR)
+ "T201" }; // Clinical Attribute (e.g. A/G Ratio)
+ static private final String[] REQUIRED_EXCLUDE_CUIS = {
+ "C1443182", // "Calculated (procedure)"
+ "C1715372", // "Medical problem"
+ "C1441604" }; // "High sensitivity"
+
+ static final Logger LOGGER = Logger.getLogger( "LabValueFinder" );
+
+ @ConfigurationParameter( name = PARAM_ALL_SECTIONS,
+ description = "Use all Annotatable sections. This ignores the value of " + PARAM_SECTIONS,
+ defaultValue = "true",
+ mandatory = false )
+ private String _useAllSectionText;
+ private boolean _useAllSections;
+
+ @ConfigurationParameter( name = PARAM_SECTIONS,
+ description = "Annotatable sections",
+ defaultValue = {},
+ mandatory = false )
+ private String[] _annotatableSections;
+ private Collection<String> annotatableSections;
+
+ @ConfigurationParameter( name = PARAM_VALUE_WORDS,
+ description = "Words indicating values",
+ defaultValue = {},
+ mandatory = false )
+ private String[] _valueWords;
+ private Collection<String> valueWords;
+
+ @ConfigurationParameter( name = PARAM_MAX_NEWLINES,
+ description = "Maximum newlines between lab and value",
+ mandatory = false )
+ private int maxLineCount = DEFAULT_MAX_LINE_COUNT;
+
+ @ConfigurationParameter( name = PARAM_LAB_TUIS,
+ description = "TUIs indicating lab measurements",
+ defaultValue = {} )
+ private String[] _labTuis;
+ private Collection<String> labTuis;
+
+ @ConfigurationParameter( name = PARAM_LAB_X_CUIS,
+ description = "CUIs not indicating specific lab measurements",
+ defaultValue = {},
+ mandatory = false )
+ private String[] _excludeCuis;
+ private Collection<String> excludeCuis;
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ // Always call the super first
+ super.initialize( context );
+
+ _useAllSections = Boolean.parseBoolean( _useAllSectionText );
+ // Start the lists with the required parameters, then add user parameters.
+ annotatableSections = gatherParameters( REQUIRED_SECTIONS, _annotatableSections );
+ valueWords = gatherParameters( REQUIRED_VALUE_WORDS, _valueWords );
+ labTuis = gatherParameters( REQUIRED_LAB_TUIS, _labTuis );
+ excludeCuis = gatherParameters( REQUIRED_EXCLUDE_CUIS, _excludeCuis );
+
+ LOGGER.debug( PARAM_MAX_NEWLINES + " = " + maxLineCount );
+ LOGGER.info( labTuis.size() + " lab TUIs: " + labTuis.toString() );
+ }
+
+ /**
+ * @param requiredValues -
+ * @param userValues -
+ * @return A collection of all values in upper case
+ */
+ static private Collection<String> gatherParameters( final String[] requiredValues, final String[] userValues ) {
+ final Collection<String> values = Arrays.stream( requiredValues )
+ .map( String::toUpperCase )
+ .collect( Collectors.toSet() );
+ for ( String value : userValues ) {
+ values.add( value.toUpperCase() );
+ }
+ return values;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ LOGGER.info( "Associating Labs with values ..." );
+
+ final List<Class<? extends Annotation>> valueClasses = Arrays.asList( NumToken.class, FractionAnnotation.class );
+
+ // Time may filter wanted clock positions such as in breast cancer
+ final Map<Annotation, List<IdentifiedAnnotation>> filterMap = createCoveringMap( jCas, valueClasses,
+ Arrays.asList( DateAnnotation.class, TimeAnnotation.class ) );
+
+ final Map<Annotation, List<IdentifiedAnnotation>> subsumeMap = createCoveringMap( jCas, valueClasses,
+ Arrays.asList( FractionAnnotation.class, RangeAnnotation.class, MeasurementAnnotation.class ) );
+
+ for ( Segment segment : JCasUtil.select( jCas, Segment.class ) ) {
+ if ( _useAllSections || annotatableSections.isEmpty() || annotatableSections.contains( segment.getId() ) ) {
+ final List<LabMention> mentions = annotateMentions( jCas, segment );
+ fillInValues( jCas, mentions, filterMap, subsumeMap, segment.getBegin(), segment.getEnd() );
+ }
+ }
+
+ LOGGER.info( "Finished." );
+ }
+
+ /**
+ * @param jCas -
+ * @param segment -
+ * @return Existing and extracted LabMentions in the segment.
+ */
+ private List<LabMention> annotateMentions( final JCas jCas, final Segment segment ) {
+ final List<LabMention> labMentions = new ArrayList<>();
+ for ( IdentifiedAnnotation annotation : JCasUtil.selectCovered( jCas, IdentifiedAnnotation.class, segment ) ) {
+ // first check to see if the annotation is a lab mention.
+ if ( LabMention.class.isInstance( annotation ) ) {
+ // Check for existing value.
+ final ResultOfTextRelation relation = ((LabMention) annotation).getLabValue();
+ if ( relation != null && relation.getArg2() != null ) {
+ // LabMention is already fully established with a value. Skip it.
+ continue;
+ } else if ( relation == null ) {
+ // LabMention needs a value relation.
+ initValueRelation( jCas, (LabMention) annotation );
+ }
+ // add the LabMention and move on.
+ labMentions.add( (LabMention) annotation );
+ continue;
+ }
+ // Annotation was not a LabMention, but check to see if any part of it can be.
+ final Collection<UmlsConcept> validConcepts
+ = OntologyConceptUtil.getUmlsConceptStream( annotation )
+ .filter( c -> labTuis.contains( c.getTui() ) )
+ .filter( c -> !excludeCuis.contains( c.getCui() ) ).collect( Collectors.toList() );
+ if ( validConcepts.isEmpty() ) {
+ continue;
+ }
+ // We have valid lab concepts in the annotation. Create an overlapping LabMention with those concepts.
+ final LabMention lab = createLabMention( jCas, validConcepts, annotation.getBegin(), annotation.getEnd() );
+ labMentions.add( lab );
+ }
+ return labMentions;
+ }
+
+ /**
+ * @param jCas -
+ * @param lab for which a value relation should be initialized.
+ */
+ static private void initValueRelation( final JCas jCas, final LabMention lab ) {
+ final ResultOfTextRelation relation = new ResultOfTextRelation( jCas );
+ RelationArgument arg1 = new RelationArgument( jCas );
+ arg1.setArgument( lab );
+ relation.setArg1( arg1 );
+ // set lab mention's value as the relation.
+ lab.setLabValue( relation );
+ }
+
+ /**
+ * @param jCas -
+ * @param concepts Lab concepts.
+ * @param begin begin index for a new LabMention.
+ * @param end end index for a new LabMention.
+ * @return a new LabMention with the given attributes.
+ */
+ static private LabMention createLabMention( final JCas jCas, final Collection<UmlsConcept> concepts,
+ final int begin, final int end ) {
+ final LabMention lab = new LabMention( jCas, begin, end );
+ lab.setId( CONST.NE_TYPE_ID_LAB );
+ lab.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_EXPLICIT_AE );
+ // copy in the concepts.
+ final FSArray conceptArray = new FSArray( jCas, concepts.size() );
+ int arrIdx = 0;
+ for ( UmlsConcept umlsConcept : concepts ) {
+ conceptArray.set( arrIdx, umlsConcept );
+ arrIdx++;
+ }
+ lab.setOntologyConceptArr( conceptArray );
+ // create value relation, argument 1 is the lab mention.
+ initValueRelation( jCas, lab );
+ lab.addToIndexes();
+ LOGGER.debug( "created " + getDebugText( lab ) );
+ return lab;
+ }
+
+ /**
+ * @param docText -
+ * @param segmentBegin begin index of the section.
+ * @param segmentEnd end index of the section.
+ * @return List of all newline indices in the section, plus the end of the section.
+ */
+ static private List<Integer> getNewLines( final String docText, final int segmentBegin, final int segmentEnd ) {
+ final List<Integer> newLines = new ArrayList<>();
+ int index = docText.indexOf( '\n', segmentBegin );
+ while ( index >= 0 && index < segmentEnd ) {
+ newLines.add( index );
+ index = docText.indexOf( '\n', index + 1 );
+ }
+ newLines.add( segmentEnd );
+ return newLines;
+ }
+
+ private void fillInValues( final JCas jCas,
+ final List<LabMention> labs,
+ final Map<Annotation, List<IdentifiedAnnotation>> filterMap,
+ final Map<Annotation, List<IdentifiedAnnotation>> subsumeMap,
+ final int segmentBegin,
+ final int segmentEnd ) {
+ if ( labs == null || labs.isEmpty() ) {
+ return;
+ }
+ final List<Integer> newLines = getNewLines( jCas.getDocumentText(), segmentBegin, segmentEnd );
+
+ final List<LabMention> sortedLabs = sortOverlapsByLength( labs );
+ final int numMentions = sortedLabs.size();
+ final Collection<Annotation> candidateSet = new HashSet<>();
+ for ( int i = 0; i < numMentions; i++ ) {
+ candidateSet.clear();
+ final LabMention lab = sortedLabs.get( i );
+ Annotation value = null;
+ final LabMention nextLab = (i + 1 < sortedLabs.size()) ? sortedLabs.get( i + 1 ) : null;
+ final int nextLabBegin = nextLab != null ? nextLab.getBegin() : newLines.get( newLines.size() - 1 );
+ final int windowBegin = lab.getEnd();
+ final int windowEnd = getValueWindowEnd( windowBegin, nextLabBegin, newLines );
+
+ LOGGER.debug( "Seeking value for: " + getDebugText( lab ) + " between " + windowBegin + " and " + windowEnd );
+ for ( NumToken numToken : JCasUtil.selectCovered( jCas, NumToken.class, windowBegin, windowEnd ) ) {
+ LOGGER.debug( " " + getDebugText( numToken ) );
+ final List<IdentifiedAnnotation> filters = filterMap.get( numToken );
+ if ( filters != null && !filters.isEmpty() ) {
+ LOGGER.debug( " Filtering due to " + getDebugText( filters.get( 0 ) ) );
+ } else {
+ final List<IdentifiedAnnotation> subsumers = subsumeMap.getOrDefault( numToken, Collections.emptyList() );
+ if ( subsumers.isEmpty() ) {
+ candidateSet.add( numToken );
+ } else {
+ candidateSet.addAll( subsumers );
+ LOGGER.debug( "subsuming candidate: " + getDebugText( numToken ) );
+ }
+ }
+ }
+ if ( !candidateSet.isEmpty() ) {
+ // get first, shortest span value
+ final List<Annotation> candidateList = new ArrayList<>( candidateSet );
+ candidateList.sort( DefaultAspanComparator.getInstance() );
+ // prefer non-range values, if any
+ value = candidateList.stream()
+ .filter( a -> !(a instanceof RangeAnnotation) )
+ .findFirst()
+ .orElse( candidateList.get( 0 ) );
+ LOGGER.debug( "Set to value: " + getDebugText( value ) );
+ } else {
+ // attempt to find a text (word) value
+ value = JCasUtil.selectCovered( jCas, WordToken.class, windowBegin, windowEnd ).stream()
+ .filter( w -> valueWords.contains( w.getCoveredText().toUpperCase() ) )
+ .findFirst()
+ .orElse( null );
+ }
+ if ( value != null ) {
+ LOGGER.debug( "setting lab value to " + getDebugText( value ) );
+ final RelationArgument arg2 = new RelationArgument( jCas );
+ arg2.setArgument( value );
+ lab.getLabValue().setArg2( arg2 );
+ }
+ }
+ }
+
+ // first of: start of next mention, start of max newline, or end of segment
+ private int getValueWindowEnd( final int windowBegin, final int nextLabBegin, final List<Integer> newLines ) {
+ int eolSkips = 0;
+ int maxNewLine = newLines.get( newLines.size() - 1 );
+ for ( Integer newLine : newLines ) {
+ if ( newLine >= windowBegin ) {
+ eolSkips++;
+ if ( eolSkips > maxLineCount ) {
+ break;
+ }
+ maxNewLine = newLine;
+ if ( newLine > nextLabBegin ) {
+ break;
+ }
+ }
+ }
+ return Math.min( maxNewLine, nextLabBegin );
+ }
+
+
+ @SuppressWarnings( { "rawtypes", "unchecked" } ) // hold my beer and watch this...
+ static private Map<Annotation, List<IdentifiedAnnotation>> createCoveringMap( final JCas jCas,
+ final List<Class<? extends Annotation>> coveredClasses,
+ final List<Class<? extends IdentifiedAnnotation>> coveringClasses ) {
+ final Map<Annotation, List<IdentifiedAnnotation>> allCovering = new HashMap<>();
+ for ( Class covered : coveredClasses ) {
+ for ( Class covering : coveringClasses ) {
+ allCovering.putAll( JCasUtil.indexCovering( jCas, covered, covering ) );
+ }
+ }
+ return allCovering;
+ }
+
+ /**
+ * The method name does not really describe what it does, but it is close.
+ *
+ * @param list -
+ * @param <T> we only deal with annotations.
+ * @return a sorted list.
+ */
+ static private <T extends Annotation> List<T> sortOverlapsByLength( final List<T> list ) {
+ final List<T> sortedList = new ArrayList<>( list );
+ sortedList.sort( ( a1, a2 ) -> {
+ int begin1 = a1.getBegin();
+ int end1 = a1.getEnd();
+ int begin2 = a2.getBegin();
+ int end2 = a2.getEnd();
+ int beginCompare = Integer.compare( begin1, begin2 );
+ return ((beginCompare < 0) ? Integer.compare( end1, begin2 )
+ : (beginCompare == 0) ? Integer.compare( end1, end2 ) : Integer.compare( begin1, end2 ));
+ } );
+ return sortedList;
+ }
+
+ static private String getDebugText( final Annotation a ) {
+ return a.getType().getShortName() + "(" + a.getBegin() + "-" + a.getEnd() + "): " + a.getCoveredText();
+ }
+
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( LabValueFinder.class );
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription( final Object... objects ) throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( LabValueFinder.class, objects );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv (added)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv Wed Nov 15 00:03:28 2017
@@ -0,0 +1,42 @@
+// CUI|TUI|Text|preferredTerm
+C0201838|T059|Albumin
+C0202202|T059|Protein
+C0201850|T059|alkaline phosphatase|Alkaline phosphatase measurement
+C0201836|T059|ALT|Alanine aminotransferase measurement
+C0201899|T059|AST|Aspartate aminotransferase measurement
+C0201913|T059|bilirubin|Bilirubin, total measurement
+C0036808|T059|Bilirubin, Indirect
+C0858048|T059|Bilirubin, Direct
+C0201973|T059|Total CK
+C0523584|T059|CK-MB|Creatine kinase MB measurement
+C0523584|T059|CKMB|Creatine kinase MB measurement
+C0023508|T060|white count|White Blood Cell Count procedure
+C0201803|T059|osmolality|Osmolality Measurement
+C0017564|T060|GFR|Glomerular Filtration Rate
+C0588466|T059|RBC, UA|Red blood cells urine (lab test)
+C0000010|T059|WBC, UA|White blood cells urine (lab test)
+C0201837|T201|A/G Ratio|Albumin/Globulin ratio
+C0373670|T059|Lipase|Lipase measurement
+C0033707|T059|Protime|Prothrombin time assay
+C0525032|T059|INR|International Normalized Ratio
+C1443182|T059|Calc|Calculated (procedure)
+C00337443|T059|sodium|Sodium measurement
+C00202194|T059|potassium|Potassium measurement
+C00003074|T201|Anion Gap
+C00202230|T059|TSH|Thyroid stimulating hormone measurement
+C01171408|T059|LDL/HDL|High density/low density lipoprotein ratio measurement
+C00518015|T059|hemoglobin|Hemoglobin measurement
+C00032181|T059|platelet count|Platelet Count measurement
+C00018935|T059|hematocrit|Hematocrit procedure
+C00201657|T059|CRP|C-reactive protein measurement
+C01535922|T059|procalcitonin|Procalcitonin measurement
+C00202115|T059|lactate|Lactic acid measurement
+C00202225|T059|free T4|T4 free measurement
+C00201934|T059|cardiac enzymes|Cardiac enzymes measurement
+C00337438|T059|glucose|Glucose measurement
+C00201802|T059|specific gravity|Specific gravity measurement
+C00200635|T059|lymphocytes|Lymphocyte Count measurement
+C00005845|T059|BUN|Blood urea nitrogen measurement
+C00201975|T059|creatinine|Creatinine measurement
+C01305866|T060|weight|Weighing patient
+C01305855|T201|BMI|Body mass index
Added: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml (added)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml Wed Nov 15 00:03:28 2017
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<lookupSpecification>
+ <dictionaries>
+ <dictionary>
+ <name>LabAnnotatorTestDict</name>
+ <implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.BsvRareWordDictionary
+ </implementationName>
+ <properties>
+ <property key="bsvPath" value="org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv"/>
+ </properties>
+ </dictionary>
+ </dictionaries>
+
+ <conceptFactories>
+ <conceptFactory>
+ <name>LabAnnotatorTestConcepts</name>
+ <implementationName>org.apache.ctakes.dictionary.lookup2.concept.BsvConceptFactory</implementationName>
+ <properties>
+ <property key="bsvPath" value="org/apache/ctakes/examples/dictionary/lookup/fast/tinyDict.bsv"/>
+ </properties>
+ </conceptFactory>
+ </conceptFactories>
+
+ <!-- Defines what terms and concepts will be used -->
+ <dictionaryConceptPairs>
+ <dictionaryConceptPair>
+ <name>LabAnnotatorPair</name>
+ <dictionaryName>LabAnnotatorTestDict</dictionaryName>
+ <conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
+ </dictionaryConceptPair>
+ </dictionaryConceptPairs>
+
+ <rareWordConsumer>
+ <name>Term Consumer</name>
+ <implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
+ <properties>
+ <property key="codingScheme" value="custom"/>
+ </properties>
+ </rareWordConsumer>
+
+</lookupSpecification>
Modified: ctakes/trunk/ctakes-regression-test/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/pom.xml?rev=1815277&r1=1815276&r2=1815277&view=diff
==============================================================================
--- ctakes/trunk/ctakes-regression-test/pom.xml (original)
+++ ctakes/trunk/ctakes-regression-test/pom.xml Wed Nov 15 00:03:28 2017
@@ -10,108 +10,17 @@
<description>Apache cTAKES Regression-test</description>
<dependencies>
- <!-- type system is added by utils. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-type-system</artifactId>-->
- <!--</dependency>-->
- <!-- core is added by everybody. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-core</artifactId>-->
- <!--</dependency>-->
- <!-- utils is added by core. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-utils</artifactId>-->
- <!--</dependency>-->
- <!-- jdom is added by core. -->
- <!--<dependency>-->
- <!--<groupId>jdom</groupId>-->
- <!--<artifactId>jdom</artifactId>-->
- <!--</dependency>-->
- <!-- junit is added by utils. -->
- <!--<dependency>-->
- <!--<groupId>junit</groupId>-->
- <!--<artifactId>junit</artifactId>-->
- <!--</dependency>-->
- <!-- context tokenizer is added by assertion. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-context-tokenizer</artifactId>-->
- <!--</dependency>-->
- <!-- pre-processor is added by clinical pipeline. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-preprocessor</artifactId>-->
- <!--</dependency>-->
- <!-- lvg is added by dependency parser. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-lvg</artifactId>-->
- <!--</dependency>-->
- <!-- chunker is added by assertion. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-chunker</artifactId>-->
- <!--</dependency>-->
- <!-- ne contexts is added by clinical pipeline. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-ne-contexts</artifactId>-->
- <!--</dependency>-->
- <!-- pos tagger is added by chunker. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-pos-tagger</artifactId>-->
- <!--</dependency>-->
- <!-- assertion is added by clinical pipeline. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-assertion</artifactId>-->
- <!--</dependency>-->
- <!-- dependency parser is added by assertion. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-dependency-parser</artifactId>-->
- <!--</dependency>-->
- <!-- If clinical-pipeline is a dependency then everything in clinical-pipeline should be removed.
- This makes maintenance much easier wrt upgraded versions and removed/unused modules (e.g. old lookup).-->
- <!-- clinical pipeline is added by relation extractor. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-clinical-pipeline</artifactId>-->
- <!--</dependency>-->
- <!-- drug-ner is added by smoking status, side effect. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-drug-ner</artifactId>-->
- <!--</dependency> -->
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-cpe</artifactId>
<version>2.9.0</version>
</dependency>
+ <!-- Allow for use of example notes, example dictionaries, example models ... -->
<dependency>
<groupId>org.apache.ctakes</groupId>
- <artifactId>ctakes-clinical-pipeline</artifactId>
+ <artifactId>ctakes-examples</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.ctakes</groupId>
- <artifactId>ctakes-smoking-status</artifactId>
- </dependency>
- <!-- constituency parser is added by assertion. -->
- <!--<dependency>-->
- <!--<groupId>org.apache.ctakes</groupId>-->
- <!--<artifactId>ctakes-constituency-parser</artifactId>-->
- <!--</dependency> -->
- <!--
- <dependency>
- <groupId>org.apache.ctakes</groupId>
- <artifactId>ctakes-coreference</artifactId>
- </dependency>
- -->
- <dependency>
<groupId>xmlunit</groupId>
<artifactId>xmlunit</artifactId>
<version>1.4</version>
Added: ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java?rev=1815277&view=auto
==============================================================================
--- ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java (added)
+++ ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/core/ae/LabValueFinderTester.java Wed Nov 15 00:03:28 2017
@@ -0,0 +1,279 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.typesystem.type.textsem.LabMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import static org.apache.ctakes.core.ae.LabValueFinder.PARAM_ALL_SECTIONS;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 11/14/2017
+ */
+public class LabValueFinderTester {
+
+ static private final Logger LOGGER = Logger.getLogger( "LabValueFinderTester" );
+
+ static private final String LAB_RESULTS_OID = "2.16.840.1.113883.10.20.22.2.3.1";
+ static private final String DICT_DESC_PATH = "org/apache/ctakes/examples/dictionary/lookup/fast/tinyDictSpec.xml";
+ static private AnalysisEngineDescription simpleSegmentator;
+ static private AnalysisEngineDescription labSegmentator;
+ static private AnalysisEngineDescription midPipeline;
+ static private AnalysisEngineDescription defaultLabAnnotator;
+ static private AnalysisEngineDescription sameLineLabAnnotator;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws UIMAException {
+ simpleSegmentator = SimpleSegmentAnnotator.createAnnotatorDescription();
+ labSegmentator = SimpleSegmentAnnotator.createAnnotatorDescription( LAB_RESULTS_OID );
+
+ final AggregateBuilder builder = new AggregateBuilder();
+ builder.add( SentenceDetector.createAnnotatorDescription() );
+ builder.add( TokenizerAnnotatorPTB.createAnnotatorDescription() );
+ builder.add( AnalysisEngineFactory.createEngineDescription( ContextDependentTokenizerAnnotator.class ) );
+ builder.add( AnalysisEngineFactory.createEngineDescription( POSTagger.class ) );
+ builder.add( DefaultJCasTermAnnotator.createAnnotatorDescription( DICT_DESC_PATH ) );
+ midPipeline = builder.createAggregateDescription();
+
+ defaultLabAnnotator = LabValueFinder.createAnnotatorDescription( PARAM_ALL_SECTIONS, "false" );
+ sameLineLabAnnotator = LabValueFinder.createAnnotatorDescription( LabValueFinder.PARAM_MAX_NEWLINES, 1, PARAM_ALL_SECTIONS, "false" );
+// LabValueFinder.LOGGER.setLevel( Level.DEBUG );
+ }
+
+
+ /**
+ * @throws UIMAException -
+ */
+ @Test
+ public void testTable() throws UIMAException {
+ String text =
+ "Recent Results (from the past 24 hour(s))\n" +
+ "HEPATIC FUNCTION PANEL\n" +
+ "Collection Time: 12/04/15 5:40 PM\n" +
+ " Result Value Ref Range\n" +
+ " Albumin 2.2 (*) 3.7 - 5.1 g/dL\n" +
+ " Total Protein 5.5 (*) 5.8 - 8.0 g/dL\n" +
+ " Alkaline Phosphatase 844 (*) 42 - 121 IU/L\n" +
+ " ALT 30 10 - 60 Unit/L\n" +
+ " AST 130 (*) 10 - 42 Unit/L\n" +
+ " Total Bilirubin 1.3 0.4 - 1.3 mg/dL\n" +
+ " Bilirubin, Direct 0.4 (*) 0.0 - 0.2 mg/dL\n" +
+ " Bilirubin, Indirect 0.9 0.0 - 1.0 mg/dL\n" +
+ "LIPASE\n" +
+ "Collection Time: 12/04/15 7 PM\n" + // "7 PM" is covered by a TimeAnnotation
+ " Result Value Ref Range\n" +
+ " Lipase 19 (*) 22 - 51 Unit/L\n" +
+ "PROTIME-INR\n" +
+ " Collection Time: 12/04/15 7:45 PM\n" + // "7:45 PM" isn't covered by a TimeAnnotation
+ " Result Value Ref Range\n" +
+ " Protime 18.0 (*) 9.0 - 11.5 sec\n" +
+ " INR 1.9\n" +
+ "COMPREHENSIVE METABOLIC PANEL\n" +
+ "Collection Time: 12/04/15 7:45 AM\n" +
+ "Result Value Ref Range\n" +
+ "GFR Calc , Female N-Blk 73 >60 mL/min\n" +
+ "Osmolality Calc 281 266 - 309 mOsm/K\n" +
+ "A/G Ratio 0.7 (*) 1.1 - 2.2\n" +
+ "RBC, UA 1 0 - 2 /HPF\n" +
+ "WBC, UA 5 (*) 0 - 4 /HPF\n" +
+ "CK TOTAL AND CKMB\n" +
+ "Collection Time: 12/04/15 10:00 AM\n" + // "10:00 AM" isn't covered by a TimeAnnotation
+ "Result Value Ref Range\n" +
+ "Total CK 125 30 - 240 Unit/L\n" +
+ "CK-MB 1.3 0.0 - 9.0 ng/mL\n";
+ JCas jCas = processLabText( text );
+ assertLabMentions( jCas,
+ "Albumin", "2.2",
+ "Protein", "5.5",
+ "Alkaline Phosphatase", "844",
+ "ALT", "30",
+ "AST", "130",
+ "Bilirubin", "1.3",
+ "Bilirubin, Direct", "0.4",
+ "Bilirubin", "", // We are not using term subsumption, so bilubrin shows up twice
+ "Bilirubin, Indirect", "0.9",
+ "Bilirubin", "", // We are not using term subsumption, so bilubrin shows up twice
+ "LIPASE", "",
+ "Lipase", "19",
+ "PROTIME", "",
+ "INR", "7", // wrong, but time not annotated
+ "Protime", "18.0",
+ "INR", "1.9",
+ "GFR", "73",
+ "Osmolality", "281",
+ "A/G Ratio", "0.7",
+ "RBC, UA", "1",
+ "WBC, UA", "5",
+ "CKMB", "10", // wrong, but time not annotated
+ "Total CK", "125",
+ "CK-MB", "1.3"
+ );
+
+ // no lab mentions except in lab sections
+ jCas = processNonLabText( text );
+ assertLabMentions( jCas );
+ }
+
+ @Test
+ public void testRanges() throws UIMAException {
+ final String text =
+ "Sodium Latest Range: 135-145 mmol/L 138\n" +
+ "Anion Gap Latest Range: 13-16 mmol/L\n" +
+ "Potassium Latest Range: 3.5-5.3 mmol/L 3.8\n" + // range not annotated
+ "TSH, High Sensitivity Latest Range: 0.450-5.100 uIU/mL 1.939\n" + // range not annotated
+ "LDL/HDL Ratio No range found 2.6\n";
+ final JCas jCas = processLabText( text );
+ assertLabMentions( jCas,
+ "Sodium", "138",
+ "Anion Gap", "13-16", // nothing but range available, so we use that
+ "Potassium", "3.5", // should be "3.8", but range not annotated
+ "TSH", "0.450", // should be "1.939", but range not annotated
+ "LDL/HDL", "2.6"
+ );
+ }
+
+ @Test
+ public void testFreeText() throws UIMAException {
+ String text =
+ "Weight / BMI: Recent weight (as of 05/05/16) is\n" +
+ "45.36 kg (100 lb).\n " +
+ "Hemoglobin is 13.9, hematocrit 47.0, and platelet count\n" +
+ "366,000. CRP was 36.77. Procalcitonin was 1.32. Lactate was\n" +
+ "3.9. Free T4 was 1.3. TSH was 2.82. Point of care cardiac enzymes\n" +
+ "were normal. CMS was normal except for an elevated potassium of\n" +
+ "6, elevated anion gap of 27, elevated glucose of 153, elevated BUN\n" +
+ "of 80, elevated creatinine of 1.9. Low GFR 25.\n" +
+ "\n" +
+ "Urinalysis: Specific gravity 1.015, white count was elevated\n" +
+ "29,100, with 69 segs, 20 bands, 5 lymphocytes, and\n" +
+ "6 monos.\n";
+ JCas jCas = processLabText( text );
+ assertLabMentions( jCas,
+ "Weight", "",
+ "BMI", "",
+ "weight", "45.36 kg",
+ "Hemoglobin", "13.9",
+ "hematocrit", "47.0",
+ "platelet count", "366,000",
+ "CRP", "36.77",
+ "Procalcitonin", "1.32",
+ "Lactate", "3.9",
+ "Free T4", "1.3",
+ "TSH", "2.82",
+ "cardiac enzymes", "normal",
+ "potassium", "6",
+ "anion gap", "27",
+ "glucose", "153",
+ "BUN", "80",
+ "creatinine", "1.9",
+ "GFR", "25",
+ "Specific gravity", "1.015",
+ "white count", "29,100",
+ "lymphocytes", "6" // Should be "5", but LabsAnnotator doesn't handle values before words
+ );
+ jCas = processWithoutSpanningNewlines( text );
+ assertLabMentions( jCas,
+ "Weight", "",
+ "BMI", "",
+ "weight", "",
+ "Hemoglobin", "13.9",
+ "hematocrit", "47.0",
+ "platelet count", "",
+ "CRP", "36.77",
+ "Procalcitonin", "1.32",
+ "Lactate", "",
+ "Free T4", "1.3",
+ "TSH", "2.82",
+ "cardiac enzymes", "",
+ "potassium", "",
+ "anion gap", "27",
+ "glucose", "153",
+ "BUN", "",
+ "creatinine", "1.9",
+ "GFR", "25",
+ "Specific gravity", "1.015",
+ "white count", "elevated", //, // number on next line, so went for the word
+ "lymphocytes", ""
+ );
+ }
+
+ private JCas processLabText( final String text ) throws UIMAException {
+ return processText( text, true, true );
+ }
+
+ private JCas processNonLabText( final String text ) throws UIMAException {
+ return processText( text, false, true );
+ }
+
+ private JCas processWithoutSpanningNewlines( final String text ) throws UIMAException {
+ return processText( text, true, false );
+ }
+
+ private JCas processText( final String text, final boolean isLabText, final boolean spanNewlines ) throws UIMAException {
+ final JCas jCas = JCasFactory.createJCas();
+ jCas.setDocumentText( text );
+ SimplePipeline.runPipeline( jCas,
+ (isLabText) ? labSegmentator : simpleSegmentator,
+ midPipeline,
+ (spanNewlines) ? defaultLabAnnotator : sameLineLabAnnotator );
+ return jCas;
+ }
+
+ private void assertLabMentions( final JCas jCas, final String... expected ) {
+ final List<LabMention> labs = new ArrayList<>( JCasUtil.select( jCas, LabMention.class ) );
+ printLabMentions( jCas );
+ int expectedLength = expected.length;
+ assertEquals( "Number of labs is incorrect", 0, expectedLength % 2 );
+ assertEquals( "Number of labs is incorrect", expectedLength / 2, labs.size() );
+ for ( int i = 0; i < expectedLength; i += 2 ) {
+ final LabMention lab = labs.get( i / 2 );
+ assertEquals( "Lab is not the same", expected[ i ], lab.getCoveredText() );
+ if ( lab.getLabValue() != null && lab.getLabValue().getArg2() != null && lab.getLabValue().getArg2().getArgument() != null ) {
+ assertEquals( "Value is not the same for " + expected[ i ],
+ expected[ i + 1 ], lab.getLabValue().getArg2().getArgument().getCoveredText() );
+ } else {
+ assertEquals( expected[ i + 1 ], "" );
+ }
+ }
+ }
+
+ private void printLabMentions( final JCas jCas ) {
+ for ( Segment segment : JCasUtil.select( jCas, Segment.class ) ) {
+ final Collection<LabMention> labs = JCasUtil.selectCovered( jCas, LabMention.class, segment );
+ LOGGER.info( "Section " + segment.getPreferredText() + " (" + segment.getId() + "): " + labs.size() + " lab(s)" );
+ for ( LabMention lab : labs ) {
+ if ( lab.getLabValue() != null && lab.getLabValue().getArg2() != null && lab.getLabValue().getArg2().getArgument() != null ) {
+ LOGGER.info( " " + getDebugText( lab )
+ + " value: " + getDebugText( lab.getLabValue().getArg2().getArgument() ) );
+ } else {
+ LOGGER.info( " " + getDebugText( lab ) + " no value" );
+ }
+ }
+ }
+ }
+
+ static private String getDebugText( final Annotation a ) {
+ return a.getType().getShortName() + "(" + a.getBegin() + "-" + a.getEnd() + "): " + a.getCoveredText();
+ }
+
+}
Modified: ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java?rev=1815277&r1=1815276&r2=1815277&view=diff
==============================================================================
--- ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java (original)
+++ ctakes/trunk/ctakes-type-system/src/main/java/org/apache/ctakes/typesystem/type/constants/CONST.java Wed Nov 15 00:03:28 2017
@@ -42,8 +42,9 @@ public class CONST {
public static final int NE_DISCOVERY_TECH_DICT_LOOKUP = 1;
public static final int NE_DISCOVERY_TECH_GOLD_ANNOTATION = 2;
-
- public static final int NE_POLARITY_NEGATION_ABSENT = 1;
+ public static final int NE_DISCOVERY_TECH_EXPLICIT_AE = 3;
+
+ public static final int NE_POLARITY_NEGATION_ABSENT = 1;
public static final int NE_POLARITY_NEGATION_PRESENT = -1;
public static final int NE_UNCERTAINTY_PRESENT = 1;