You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2015/03/16 17:59:14 UTC

svn commit: r1667069 - /ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java

Author: seanfinan
Date: Mon Mar 16 16:59:14 2015
New Revision: 1667069

URL: http://svn.apache.org/r1667069
Log:
getFastPipeline doesn't need NP or LookupWindow information.  Refactored accordingly.
Also cascaded the Note text in main(...) for easier reading.

Modified:
    ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java

Modified: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java?rev=1667069&r1=1667068&r2=1667069&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java Mon Mar 16 16:59:14 2015
@@ -18,18 +18,7 @@
  */
 package org.apache.ctakes.clinicalpipeline;
 
-import java.io.FileNotFoundException;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
-import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
-import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
-import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
-import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
-import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.*;
 import org.apache.ctakes.chunker.ae.Chunker;
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
 import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
@@ -47,12 +36,14 @@ import org.apache.ctakes.dictionary.look
 import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
 import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -61,138 +52,211 @@ import org.apache.uima.fit.factory.JCasF
 import org.apache.uima.fit.pipeline.SimplePipeline;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.xml.sax.SAXException;
 
-public class ClinicalPipelineFactory {
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+
+final public class ClinicalPipelineFactory {
+
+   private ClinicalPipelineFactory() {
+   }
+
+   public static AnalysisEngineDescription getDefaultPipeline() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( getTokenProcessingPipeline() );
+      builder.add( getNpChunkerPipeline() );
+      builder.add( AnalysisEngineFactory.createEngineDescription( ConstituencyParser.class ) );
+      builder.add( UmlsDictionaryLookupAnnotator.createAnnotatorDescription() );
+      builder.add( ClearNLPDependencyParserAE.createAnnotatorDescription() );
+      builder.add( PolarityCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( UncertaintyCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( HistoryCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( ConditionalCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( GenericCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( SubjectCleartkAnalysisEngine.createAnnotatorDescription() );
+
+      return builder.createAggregateDescription();
+   }
+
+   public static AnalysisEngineDescription getFastPipeline() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( getTokenProcessingPipeline() );
+      try {
+         builder.add( AnalysisEngineFactory.createEngineDescription( DefaultJCasTermAnnotator.class,
+               AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
+               "org.apache.ctakes.typesystem.type.textspan.Sentence",
+               JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
+               ExternalResourceFactory.createExternalResourceDescription(
+                     FileResourceImpl.class,
+                     FileLocator.locateFile( "org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml" ) )
+         ) );
+      } catch ( FileNotFoundException e ) {
+         e.printStackTrace();
+         throw new ResourceInitializationException( e );
+      }
+      builder.add( ClearNLPDependencyParserAE.createAnnotatorDescription() );
+      builder.add( PolarityCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( UncertaintyCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( HistoryCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( ConditionalCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( GenericCleartkAnalysisEngine.createAnnotatorDescription() );
+      builder.add( SubjectCleartkAnalysisEngine.createAnnotatorDescription() );
+      return builder.createAggregateDescription();
+   }
+
+   public static AnalysisEngineDescription getParsingPipeline() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( getTokenProcessingPipeline() );
+      builder.add( ClearNLPDependencyParserAE.createAnnotatorDescription() );
+      builder.add( AnalysisEngineFactory.createEngineDescription( ConstituencyParser.class ) );
+      return builder.createAggregateDescription();
+   }
+
+   public static AnalysisEngineDescription getTokenProcessingPipeline() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( SimpleSegmentAnnotator.createAnnotatorDescription() );
+      builder.add( SentenceDetector.createAnnotatorDescription() );
+      builder.add( TokenizerAnnotatorPTB.createAnnotatorDescription() );
+      builder.add( LvgAnnotator.createAnnotatorDescription() );
+      builder.add( ContextDependentTokenizerAnnotator.createAnnotatorDescription() );
+      builder.add( POSTagger.createAnnotatorDescription() );
+      return builder.createAggregateDescription();
+   }
+
+   public static AnalysisEngineDescription getNpChunkerPipeline() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( Chunker.createAnnotatorDescription() );
+      builder.add( getStandardChunkAdjusterAnnotator() );
+      builder.add( AnalysisEngineFactory.createEngineDescription( CopyNPChunksToLookupWindowAnnotations.class ) );
+      builder.add( AnalysisEngineFactory.createEngineDescription( RemoveEnclosedLookupWindows.class ) );
+      return builder.createAggregateDescription();
+   }
+
+   public static AnalysisEngineDescription getStandardChunkAdjusterAnnotator() throws ResourceInitializationException {
+      AggregateBuilder builder = new AggregateBuilder();
+      // adjust NP in NP NP to span both
+      builder.add( ChunkAdjuster.createAnnotatorDescription( new String[] { "NP", "NP" }, 1 ) );
+      // adjust NP in NP PP NP to span all three
+      builder.add( ChunkAdjuster.createAnnotatorDescription( new String[] { "NP", "PP", "NP" }, 2 ) );
+      return builder.createAggregateDescription();
+   }
+
+   public static void main( final String... args ) throws IOException, UIMAException, SAXException {
+      // The note is easier to read when sentences are stacked - changed 3/16/2015 spf
+      // Two sentences had no space after the period
+      // Introduction of a space before "Discussed" actually changed the uncertainty of "surgery" from true to false
+      final String note = "History of diabetes and hypertension."
+                          + " Mother had breast cancer."
+                          + " Sister with multiple sclerosis."
+                          + " The patient is suffering from extreme pain due to shark bite."
+                          + " Recommend continuing use of aspirin, oxycodone, and coumadin."
+                          + " Continue exercise for obesity and hypertension."
+                          + " Patient denies smoking and chest pain."  // Space between sentences introduced " Patient"
+                          + " Patient has no cancer."
+                          + " There is no sign of multiple sclerosis."
+                          + " Mass is suspicious for breast cancer."
+                          + " Possible breast cancer."
+                          + " Cannot exclude stenosis."
+                          + " Some degree of focal pancreatitis is also possible."
+                          + " Discussed surgery and chemotherapy."  // Space between sentences introduced " Discussed"
+                          + " Will return if pain continues.";
+      final JCas jcas = JCasFactory.createJCas();
+      jcas.setDocumentText( note );
+      final AnalysisEngineDescription aed = getDefaultPipeline();
+//      final AnalysisEngineDescription aed = getFastPipeline();  // Outputs from default and fast pipelines are identical
+      SimplePipeline.runPipeline( jcas, aed );
+
+      final boolean printCuis = Arrays.asList( args ).contains( "cuis" );
+      final Collection<String> codes = new ArrayList<>();
+      for ( IdentifiedAnnotation entity : JCasUtil.select( jcas, IdentifiedAnnotation.class ) ) {
+
+         System.out.println( "Entity: " + entity.getCoveredText()
+                             + " === Polarity: " + entity.getPolarity()
+                             + " === Uncertain? " + (entity.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT)
+                             + " === Subject: " + entity.getSubject()
+                             + " === Generic? " + (entity.getGeneric() == CONST.NE_GENERIC_TRUE)
+                             + " === Conditional? " + (entity.getConditional() == CONST.NE_CONDITIONAL_TRUE)
+                             + " === History? " + (entity.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT)
+         );
+
+         if ( printCuis ) {
+            codes.clear();
+            codes.addAll( getCUIs( entity ) );
+            for ( String cui : codes ) {
+               System.out.print( cui + " " );
+            }
+            System.out.println();
+         }
+
+      }
+      if ( args.length > 0 ) {
+         aed.toXML( new FileWriter( args[ 0 ] ) );
+      }
+   }
+
+
+   /**
+    * @param identifiedAnnotation -
+    * @return list of all cuis
+    */
+   static private Collection<String> getCUIs( final IdentifiedAnnotation identifiedAnnotation ) {
+      final FSArray fsArray = identifiedAnnotation.getOntologyConceptArr();
+      if ( fsArray == null ) {
+         return Collections.emptySet();
+      }
+      final FeatureStructure[] featureStructures = fsArray.toArray();
+      final Collection<String> cuis = new ArrayList<>( featureStructures.length );
+      for ( FeatureStructure featureStructure : featureStructures ) {
+         if ( featureStructure instanceof UmlsConcept ) {
+            final UmlsConcept umlsConcept = (UmlsConcept)featureStructure;
+            final String cui = umlsConcept.getCui();
+            final String tui = umlsConcept.getTui();
+            if ( tui != null && !tui.isEmpty() ) {
+               cuis.add( cui + "_" + tui );
+            } else {
+               cuis.add( cui );
+            }
+         }
+      }
+      return cuis;
+   }
+
+   public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
+
+      @Override
+      public void process( JCas jCas ) throws AnalysisEngineProcessException {
+         for ( Chunk chunk : JCasUtil.select( jCas, Chunk.class ) ) {
+            if ( chunk.getChunkType().equals( "NP" ) ) {
+               new LookupWindowAnnotation( jCas, chunk.getBegin(), chunk.getEnd() ).addToIndexes();
+            }
+         }
+      }
+   }
+
+   public static class RemoveEnclosedLookupWindows extends JCasAnnotator_ImplBase {
+
+      @Override
+      public void process( JCas jCas ) throws AnalysisEngineProcessException {
+         List<LookupWindowAnnotation> lws = new ArrayList<>( JCasUtil.select( jCas, LookupWindowAnnotation.class ) );
+         // we'll navigate backwards so that as we delete things we shorten the list from the back
+         for ( int i = lws.size() - 2; i >= 0; i-- ) {
+            LookupWindowAnnotation lw1 = lws.get( i );
+            LookupWindowAnnotation lw2 = lws.get( i + 1 );
+            if ( lw1.getBegin() <= lw2.getBegin() && lw1.getEnd() >= lw2.getEnd() ) {
+               /// lw1 envelops or encloses lw2
+               lws.remove( i + 1 );
+               lw2.removeFromIndexes();
+            }
+         }
+
+      }
+
+   }
+
 
-  public static AnalysisEngineDescription getDefaultPipeline() throws ResourceInitializationException{
-    AggregateBuilder builder = new AggregateBuilder();
-    builder.add(getTokenProcessingPipeline());
-    builder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    builder.add(AnalysisEngineFactory.createEngineDescription(RemoveEnclosedLookupWindows.class));
-    builder.add(AnalysisEngineFactory.createEngineDescription(ConstituencyParser.class));
-    builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
-    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
-    builder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(ConditionalCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
-    
-    return builder.createAggregateDescription();
-  }
-  
-  public static AnalysisEngineDescription getFastPipeline() throws ResourceInitializationException{
-    AggregateBuilder builder = new AggregateBuilder();
-    builder.add(getTokenProcessingPipeline());
-    builder.add(AnalysisEngineFactory.createEngineDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    builder.add(AnalysisEngineFactory.createEngineDescription(RemoveEnclosedLookupWindows.class));
-    try {
-      builder.add(AnalysisEngineFactory.createEngineDescription(DefaultJCasTermAnnotator.class,
-          AbstractJCasTermAnnotator.PARAM_WINDOW_ANNOT_PRP,
-          "org.apache.ctakes.typesystem.type.textspan.Sentence",
-          JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY,
-          ExternalResourceFactory.createExternalResourceDescription(
-              FileResourceImpl.class,
-              FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml"))
-          ));
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      throw new ResourceInitializationException(e);
-    }
-    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
-    builder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
-    builder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
-    return builder.createAggregateDescription();
-  }
-  
-  public static AnalysisEngineDescription getParsingPipeline() throws ResourceInitializationException{
-    AggregateBuilder builder = new AggregateBuilder();
-    builder.add(getTokenProcessingPipeline());
-    builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
-    builder.add(AnalysisEngineFactory.createEngineDescription(ConstituencyParser.class));
-    return builder.createAggregateDescription();
-  }
-  
-  public static AnalysisEngineDescription getTokenProcessingPipeline() throws ResourceInitializationException {
-    AggregateBuilder builder = new AggregateBuilder();
-    builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
-    builder.add(SentenceDetector.createAnnotatorDescription());
-    builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
-    builder.add(LvgAnnotator.createAnnotatorDescription());
-    builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
-    builder.add(POSTagger.createAnnotatorDescription());
-    builder.add(Chunker.createAnnotatorDescription());
-    builder.add(getStandardChunkAdjusterAnnotator());
-    
-    return builder.createAggregateDescription();
-  }
-  
-  public static AnalysisEngineDescription getStandardChunkAdjusterAnnotator() throws ResourceInitializationException{
-    AggregateBuilder builder = new AggregateBuilder();
-    // adjust NP in NP NP to span both
-    builder.add(ChunkAdjuster.createAnnotatorDescription(new String[] { "NP", "NP" },  1));
-    // adjust NP in NP PP NP to span all three
-    builder.add(ChunkAdjuster.createAnnotatorDescription(new String[] { "NP", "PP", "NP" }, 2));
-    return builder.createAggregateDescription();
-  }
-  
-  public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
-        if (chunk.getChunkType().equals("NP")) {
-          new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
-        }
-      }
-    }
-  }
-  
-  public static class RemoveEnclosedLookupWindows extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      List<LookupWindowAnnotation> lws = new ArrayList<>(JCasUtil.select(jCas, LookupWindowAnnotation.class));
-      // we'll navigate backwards so that as we delete things we shorten the list from the back
-      for(int i = lws.size()-2; i >= 0; i--){
-        LookupWindowAnnotation lw1 = lws.get(i);
-        LookupWindowAnnotation lw2 = lws.get(i+1);
-        if(lw1.getBegin() <= lw2.getBegin() && lw1.getEnd() >= lw2.getEnd()){
-          /// lw1 envelops or encloses lw2
-          lws.remove(i+1);
-          lw2.removeFromIndexes();
-        }
-      }
-      
-    }
-    
-  }
-  
-  public static void main(String[] args) throws FileNotFoundException, IOException, UIMAException, SAXException{
-    AnalysisEngineDescription aed = getDefaultPipeline();
-    String note = "History of diabetes and hypertension. Mother had breast cancer. Sister with multiple sclerosis. " +
-    			  "The patient is suffering from extreme pain due to shark bite. Recommend continuing use of aspirin, oxycodone, and coumadin. Continue exercise for obesity and hypertension." +
-                  "Patient denies smoking and chest pain. Patient has no cancer. There is no sign of multiple sclerosis. " +
-    			  "Mass is suspicious for breast cancer. Possible breast cancer. Cannot exclude stenosis. Some degree of focal pancreatitis is also possible." +
-    			  "Discussed surgery and chemotherapy. Will return if pain continues. ";
-    JCas jcas = JCasFactory.createJCas();
-    jcas.setDocumentText(note);
-    SimplePipeline.runPipeline(jcas, aed);
-
-    for(IdentifiedAnnotation entity : JCasUtil.select(jcas, IdentifiedAnnotation.class)){
-      System.out.println("Entity: " + entity.getCoveredText() + " === Polarity: " + entity.getPolarity() +
-    		  													" === Uncertain? " + (entity.getUncertainty()==CONST.NE_UNCERTAINTY_PRESENT) +
-    		  													" === Subject: " + entity.getSubject() + 
-    		  													" === Generic? "  + (entity.getGeneric() == CONST.NE_GENERIC_TRUE) +
-    		  													" === Conditional? " + (entity.getConditional() == CONST.NE_CONDITIONAL_TRUE) +
-    		  													" === History? " + (entity.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT)
-    		  );
-    }
-    
-    if(args.length > 0)
-      aed.toXML(new FileWriter(args[0]));
-  }
-}
+}
\ No newline at end of file