You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/02/17 18:41:20 UTC
svn commit: r1874146 -
/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java
Author: seanfinan
Date: Mon Feb 17 18:41:20 2020
New Revision: 1874146
URL: http://svn.apache.org/viewvc?rev=1874146&view=rev
Log:
example ae that assigns body sides to anatomic sites.
Added:
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java
Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java?rev=1874146&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/ae/BodySideFinder.java Mon Feb 17 18:41:20 2020
@@ -0,0 +1,174 @@
+package org.apache.ctakes.examples.ae;
+
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.cr.TextBySentenceBuilder;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.BodySideModifier;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+/**
+ * This is just an example ae that will assign body sides to anatomic sites.
+ * It will assign the closest preceding side to a site.
+ * This is just an example ( including main() ) of how one can:
+ * Create a pipeline with PipelineBuilder.
+ * Add Sentences with TextBySentenceBuilder.
+ * Create and add Annotations.
+ * Fetch annotations with JCasUtil.
+ * <p>
+ * If you are unfamiliar with the builder pattern
+ * or java streams or functional references then the flow may look a little strange,
+ * but the essential ctakes-related building blocks are more straightforward.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 2/17/2020
+ */
+@PipeBitInfo(
+ name = "BodySideFinder",
+ description = "Assigns Body Side to Anatomic Sites.",
+ role = PipeBitInfo.Role.ANNOTATOR
+)
+final public class BodySideFinder extends JCasAnnotator_ImplBase {
+
+ static private final Logger LOGGER = Logger.getLogger( "BodySideFinder" );
+
+ /**
+ * Holds a umls CUI and some synonyms for body sides.
+ * Has methods to detect a matching word token and create a BodySideModifier.
+ */
+ private enum Side {
+ RIGHT( "C0205090", "right", "dextro" ),
+ LEFT( "C0205091", "left", "levo" );
+ final private String _cui;
+ final private Collection<String> _patterns;
+
+ Side( final String cui, final String... patterns ) {
+ _cui = cui;
+ _patterns = Arrays.asList( patterns );
+ }
+
+ boolean isMatch( final WordToken word ) {
+ return _patterns.stream()
+ .anyMatch( word.getCoveredText()::equalsIgnoreCase );
+ }
+
+ BodySideModifier createModifier( final JCas jCas, final WordToken word ) {
+ final BodySideModifier side = new BodySideModifier( jCas, word.getBegin(), word.getEnd() );
+ final UmlsConcept umlsConcept = new UmlsConcept( jCas );
+ umlsConcept.setCui( _cui );
+ final FSArray conceptArr = new FSArray( jCas, 1 );
+ conceptArr.set( 0, umlsConcept );
+ side.setOntologyConceptArr( conceptArr );
+ side.addToIndexes( jCas );
+ return side;
+ }
+ }
+
+ /**
+ * Process Sentence -by- Sentence.
+ * If a sentence has anatomic site(s) and wordtokens that match a body side synonym,
+ * BodySideModifier(s) are created and attached to anatomic sites that follow in the sentence.
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Body Side and Laterality ..." );
+
+ final Map<Sentence, Collection<AnatomicalSiteMention>> sentenceSiteMap
+ = JCasUtil.indexCovered( jCas, Sentence.class, AnatomicalSiteMention.class );
+
+ final Map<Sentence, Collection<WordToken>> sentenceWordMap
+ = JCasUtil.indexCovered( jCas, Sentence.class, WordToken.class );
+
+ sentenceSiteMap.entrySet().stream()
+ .filter( e -> !e.getValue().isEmpty() )
+ .forEach( e -> assignSides( jCas, e.getValue(), sentenceWordMap.get( e.getKey() ) ) );
+
+ LOGGER.info( "Finished." );
+ }
+
+ /**
+ * Iterate through types of {@link}Side, calling {@link}findSide and {@link}setSide.
+ */
+ static private void assignSides( final JCas jCas,
+ final Collection<AnatomicalSiteMention> sites,
+ final Collection<WordToken> words ) {
+ Arrays.stream( Side.values() )
+ .map( s -> findSide( jCas, words, s ) )
+ .flatMap( Collection::stream )
+ .sorted( Comparator.comparingInt( Annotation::getBegin ) )
+ .forEach( s -> setSide( s, sites ) );
+ }
+
+ /**
+ * Iterate through WordTokens to find body sides and create BodySideModifiers representing them.
+ */
+ static private Collection<BodySideModifier> findSide( final JCas jCas,
+ final Collection<WordToken> words,
+ final Side side ) {
+ return words.stream()
+ .filter( side::isMatch )
+ .map( w -> side.createModifier( jCas, w ) )
+ .collect( Collectors.toList() );
+ }
+
+ /**
+ * Assign a side to all following sites.
+ */
+ static private void setSide( final BodySideModifier side,
+ final Collection<AnatomicalSiteMention> sites ) {
+ sites.stream()
+ .filter( s -> s.getEnd() > side.getEnd() )
+ .forEach( s -> s.setBodySide( side ) );
+ }
+
+
+ /**
+ * Demo
+ */
+ public static void main( final String... args ) {
+ final String sentence = "He had a slight fracture in the proximal right fibula";
+ final int index = sentence.indexOf( "fibula" );
+ try {
+ final AnalysisEngineDescription analysisEngine = new PipelineBuilder()
+ .add( TokenizerAnnotatorPTB.class )
+ .add( BodySideFinder.class )
+ .getAnalysisEngineDesc();
+ final JCas jCas = new TextBySentenceBuilder()
+ .addSentence( sentence )
+ .build();
+ final AnatomicalSiteMention site = new AnatomicalSiteMention( jCas, index, index + 6 );
+ site.addToIndexes( jCas );
+
+ SimplePipeline.runPipeline( jCas, analysisEngine );
+
+ LOGGER.info( site.getCoveredText() + " has body side " + site.getBodySide().getCoveredText() );
+ } catch ( IOException | UIMAException uE ) {
+ LOGGER.error( uE.getMessage() );
+ }
+ }
+
+
+}