You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/10/11 02:06:23 UTC

svn commit: r1764190 - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/ae/ ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/ ctakes-core/src/main/java/org/apache/ctakes/core/util/ ctakes-examples/src/main/java/org/apache/cta...

Author: seanfinan
Date: Tue Oct 11 02:06:22 2016
New Revision: 1764190

URL: http://svn.apache.org/viewvc?rev=1764190&view=rev
Log:
PropertyAeFactory javadocs, refactoring
SentenceDetector set default model in Parameter
DocumentIdAnnotationUtil protection against missing common Views in the Cas
OntologyConceptUtil added getCuiCounts()
ExampleAggregatePipeline javadocs
Adding pipeline simplification utilities to core:
   CuiCollector stores cuis
   EntityCollector stores entity info
   PipelineBuilder adds really simplified pipeline construction
   PipelineReader adds simple pipeline construction using instructions in a flat file
Adding examples using new pipeline utilities

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
    ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java
    ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java
    ctakes/trunk/ctakes-examples/src/main/resources/org/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java
    ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java Tue Oct 11 02:06:22 2016
@@ -15,8 +15,8 @@ import java.util.Map;
 import java.util.Properties;
 
 /**
- * There may be some way to get values directly into the root UimaContext.
- * This factory can load plain old java properties files and pass the specified properties as parameters for AE creation
+ * This factory can load plain old java .properties files and pass the specified properties as parameters for AE creation.
+ * There may be some way to get values directly into the root UimaContext, but for now this works with UimaFit parameters.
  *
  * @author SPF , chip-nlp
  * @version %I%
@@ -33,29 +33,55 @@ public enum PropertyAeFactory {
 
 
    // Use a single hashmap so that multiple properties files can be used
-   final private Map<String, String> _properties = new HashMap<>();
+   final private Map<String, Object> _properties = new HashMap<>();
 
+   /**
+    * Load a .properties file with key value pairs
+    *
+    * @param filePath -
+    */
    synchronized public void loadPropertyFile( final String filePath ) {
       try ( InputStream stream = FileLocator.getAsStream( filePath ) ) {
          final Properties properties = new Properties();
          properties.load( stream );
-         for ( String name : properties.stringPropertyNames() ) {
-            final String value = properties.getProperty( name );
-            if ( value == null ) {
-               LOGGER.warn( "Property has no value: " + name );
-            } else {
-               _properties.put( name, value );
-            }
-         }
+         properties.entrySet().forEach( e -> _properties.put( e.getKey().toString(), e.getValue() ) );
       } catch ( IOException ioE ) {
          LOGGER.error( "Property File not found: " + filePath );
       }
    }
 
-   static private Object[] createParameters( final Map<String, String> parameterMap ) {
+   /**
+    * Add key value pairs to the stored properties
+    *
+    * @param parameters ket value pairs
+    */
+   synchronized public void addParameters( final Object... parameters ) {
+      if ( parameters.length == 0 ) {
+         LOGGER.warn( "No parameters specified." );
+         return;
+      }
+      if ( parameters.length % 2 != 0 ) {
+         LOGGER.error( "Odd number of parameters provided.  Should be key value pairs." );
+         return;
+      }
+      for ( int i = 0; i < parameters.length; i += 2 ) {
+         if ( parameters[ i ] instanceof String ) {
+            _properties.put( (String)parameters[ i ], parameters[ i + 1 ] );
+         } else {
+            LOGGER.warn( "Parameter " + i + " not a String, using " + parameters[ i ].toString() );
+            _properties.put( parameters[ i ].toString(), parameters[ i + 1 ] );
+         }
+      }
+   }
+
+   /**
+    * @param parameterMap map of parameter names and values
+    * @return array of Objects representing name value pairs
+    */
+   static private Object[] createParameters( final Map<String, Object> parameterMap ) {
       final Object[] parameters = new Object[ parameterMap.size() * 2 ];
       int i = 0;
-      for ( Map.Entry<String, String> entry : parameterMap.entrySet() ) {
+      for ( Map.Entry<String, Object> entry : parameterMap.entrySet() ) {
          parameters[ i ] = entry.getKey();
          parameters[ i + 1 ] = entry.getValue();
          i += 2;
@@ -68,19 +94,29 @@ public enum PropertyAeFactory {
     * @return new parameter arrays containing parameters loaded by this factory and followed by specified parameters
     */
    synchronized private Object[] getAllParameters( final Object... parameters ) {
-      if ( _properties.isEmpty() ) {
-         return parameters;
+      if ( parameters.length == 0 ) {
+         return createParameters( _properties );
       }
-      if ( parameters == null || parameters.length == 0 ) {
+      if ( parameters.length % 2 != 0 ) {
+         LOGGER.error( "Odd number of parameters provided.  Should be key value pairs." );
          return createParameters( _properties );
       }
-      final Map<String, String> parameterMap = new HashMap<>( _properties );
+      if ( _properties.isEmpty() ) {
+         return parameters;
+      }
+      final Map<String, Object> parameterMap = new HashMap<>( _properties );
       for ( int i = 0; i < parameters.length; i += 2 ) {
-         parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ].toString() );
+         if ( parameters[ i ] instanceof String ) {
+            parameterMap.put( (String)parameters[ i ], parameters[ i + 1 ] );
+         } else {
+            LOGGER.warn( "Parameter " + i + " not a String, using " + parameters[ i ].toString() );
+            parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ] );
+         }
       }
       return createParameters( parameterMap );
    }
 
+
    /**
     * This method should be avoided.  See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
     *
@@ -143,7 +179,7 @@ public enum PropertyAeFactory {
     * @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
     * @throws ResourceInitializationException if UimaFit has a problem
     */
-   public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+   static public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
          throws ResourceInitializationException {
       return StartFinishLogger.createLoggedDescription( mainDescription );
    }

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java Tue Oct 11 02:06:22 2016
@@ -18,28 +18,11 @@
  */
 package org.apache.ctakes.core.ae;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.sentdetect.DefaultSDContextGenerator;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.sentdetect.SentenceSampleStream;
+import opennlp.tools.sentdetect.*;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.TrainingParameters;
-
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
 import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
@@ -51,12 +34,16 @@ import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+import java.nio.charset.Charset;
+import java.util.*;
 
 /**
  * Wraps the OpenNLP sentence detector in a UIMA annotator
@@ -82,9 +69,9 @@ public class SentenceDetector extends JC
 	public static final String SD_MODEL_FILE_PARAM = PARAM_SD_MODEL_FILE; // backwards compatibility
 	@ConfigurationParameter(
 	    name = PARAM_SD_MODEL_FILE,
-	    mandatory = true,
-	    description = "Path to sentence detector model file"
-	    )
+         description = "Path to sentence detector model file",
+         defaultValue = "org/apache/ctakes/core/sentdetect/sd-med-model.zip"
+   )
 	private String sdModelPath;
 	
 	private opennlp.tools.sentdetect.SentenceModel sdmodel;
@@ -250,10 +237,8 @@ public class SentenceDetector extends JC
 	}
 
 	public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException{
-	  return AnalysisEngineFactory.createEngineDescription(SentenceDetector.class,
-	      SentenceDetector.PARAM_SD_MODEL_FILE,
-	      "org/apache/ctakes/core/sentdetect/sd-med-model.zip");
-	}
+      return AnalysisEngineFactory.createEngineDescription( SentenceDetector.class );
+   }
 	
 	/**
 	 * Train a new sentence detector from the training data in the first file

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,135 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.jcas.JCas;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Stores a collection of Cuis from a run, along with their associated Document Ids
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/9/2016
+ */
+public enum CuiCollector {
+   INSTANCE;
+
+   static public CuiCollector getInstance() {
+      return INSTANCE;
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "CuiCollector" );
+
+   private final Map<String, Map<String, Long>> _cuiCountMap = new HashMap<>();
+
+   /**
+    * @return Ids for documents that have Cuis stored in the CuiCollector
+    */
+   public Collection<String> getDocumentIds() {
+      return Collections.unmodifiableCollection( _cuiCountMap.keySet() );
+   }
+
+   /**
+    * @param documentId id for some document
+    * @return cuis discovered in the document
+    */
+   public Collection<String> getCuis( final String documentId ) {
+      return Collections.unmodifiableCollection( get( documentId ).keySet() );
+   }
+
+   /**
+    * @param documentId id for some document
+    * @return map of cuis discovered in the document and how many times they were discovered
+    */
+   public Map<String, Long> getCuiCounts( final String documentId ) {
+      return get( documentId );
+   }
+
+   /**
+    * @return all cuis found in all documents in the run
+    */
+   public Collection<String> getCuis() {
+      return Collections.unmodifiableCollection( _cuiCountMap.values().stream()
+            .map( Map::keySet )
+            .flatMap( Collection::stream )
+            .collect( Collectors.toSet() ) );
+   }
+
+   /**
+    * @return map of cuis discovered in all documents in the run and how many times they were discovered
+    */
+   public Map<String, Long> getCuiCounts() {
+      return Collections.unmodifiableMap(
+            _cuiCountMap.values().stream()
+                  .map( Map::entrySet )
+                  .flatMap( Collection::stream )
+                  .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( n1, n2 ) -> n1 + n2 ) ) );
+   }
+
+   /**
+    * @param documentId id for some document
+    * @return map of cuis discovered in the document and how many times they were discovered, with a warning if none exist
+    */
+   private Map<String, Long> get( final String documentId ) {
+      final Map<String, Long> cuiCounts = _cuiCountMap.get( documentId );
+      if ( cuiCounts == null ) {
+         LOGGER.warn( "No Cuis for " + documentId );
+         return Collections.emptyMap();
+      }
+      return Collections.unmodifiableMap( cuiCounts );
+   }
+
+   /**
+    * @return table of document ids and cui counts
+    */
+   @Override
+   public String toString() {
+      final List<String> allCuis = getCuis().stream().sorted().collect( Collectors.toList() );
+      final String header = "DOCUMENT_ID|" + String.join( "|", allCuis );
+      final String rows = _cuiCountMap.entrySet().stream()
+            .sorted()
+            .map( e -> createRowText( e.getKey(), e.getValue(), allCuis ) )
+            .collect( Collectors.joining() );
+      return header + "\n" + rows;
+   }
+
+   /**
+    * @param documentId id for some document
+    * @param cuiCounts  map of cuis discovered in the document and how many times they were discovered
+    * @param allCuis    all cuis found in all documents in the run
+    * @return row of document id and cui counts
+    */
+   static private String createRowText( final String documentId, final Map<String, Long> cuiCounts,
+                                        final List<String> allCuis ) {
+      final StringBuilder sb = new StringBuilder();
+      sb.append( documentId );
+      for ( String cui : allCuis ) {
+         sb.append( "|" );
+         final Long count = cuiCounts.get( cui );
+         sb.append( count == null ? "0" : count );
+      }
+      sb.append( "\n" );
+      return sb.toString();
+   }
+
+   /**
+    * Analysis Engine that stores collections of cuis by document id in the CuiCollector
+    */
+   static public final class CuiCollectorEngine extends JCasAnnotator_ImplBase {
+      @Override
+      public void process( final JCas jCas ) {
+         LOGGER.info( "Starting processing" );
+         final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+         final Map<String, Long> cuiCounts = OntologyConceptUtil.getCuiCounts( jCas );
+         CuiCollector.getInstance()._cuiCountMap.put( id, cuiCounts );
+         LOGGER.info( "Finished processing" );
+      }
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,134 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Stores a collection of simple entity information from a run, along with their associated Document Ids
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+public enum EntityCollector {
+   INSTANCE;
+
+   static public EntityCollector getInstance() {
+      return INSTANCE;
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "EntityCollector" );
+
+
+   private final Map<String, Collection<Entity>> _entityMap = new HashMap<>();
+
+   /**
+    * @return Ids for documents that have entity information stored in the EntityCollector
+    */
+   public Collection<String> getDocumentIds() {
+      return Collections.unmodifiableCollection( _entityMap.keySet() );
+   }
+
+   /**
+    * @param documentId id for some document
+    * @return simple entity objects for the document
+    */
+   public Collection<Entity> getEntities( final String documentId ) {
+      final Collection<Entity> entities = _entityMap.get( documentId );
+      if ( entities == null ) {
+         LOGGER.warn( "No Entities for " + documentId );
+         return Collections.emptyList();
+      }
+      return Collections.unmodifiableCollection( entities );
+   }
+
+   /**
+    * @return staggered list of document ids, entities and entity properties
+    */
+   @Override
+   public String toString() {
+      final StringBuilder sb = new StringBuilder();
+      for ( Map.Entry<String, Collection<Entity>> entry : _entityMap.entrySet() ) {
+         sb.append( entry.getKey() ).append( "\n" );
+         entry.getValue().stream().map( Entity::toString ).forEach( sb::append );
+      }
+      return sb.toString();
+   }
+
+   /**
+    * Holds basic information from an IdentifiedAnnotation.
+    * Necessary so the IdentifiedAnnotation can be cleaned from the Cas
+    */
+   static public final class Entity {
+      private final int _begin;
+      private final int _end;
+      private final String _coveredText;
+      private final int _polarity;
+      private final int _uncertainty;
+      private final boolean _conditional;
+      private final boolean _generic;
+      private final String _subject;
+      private final int _historyOf;
+
+      private Entity( final IdentifiedAnnotation annotation ) {
+         _begin = annotation.getBegin();
+         _end = annotation.getEnd();
+         _coveredText = annotation.getCoveredText();
+         _polarity = annotation.getPolarity();
+         _uncertainty = annotation.getUncertainty();
+         _conditional = annotation.getConditional();
+         _generic = annotation.getGeneric();
+         _subject = annotation.getSubject();
+         _historyOf = annotation.getHistoryOf();
+      }
+
+      /**
+       * @return row of entity properties
+       */
+      @Override
+      public String toString() {
+         final StringBuilder sb = new StringBuilder();
+         sb.append( '\t' ).append( _begin ).append( ',' ).append( _end ).append( '\t' )
+               .append( _coveredText ).append( "\n" );
+         sb.append( "\t\t| " ).append( _polarity < 0 ? "negated" : "affirmed" );
+         sb.append( " | " ).append( _uncertainty < 0 ? "uncertain" : "certain" );
+         sb.append( " | " ).append( _conditional ? "conditional" : "not conditional" );
+         sb.append( " | " ).append( _generic ? "generic" : "not generic" );
+         sb.append( " | " ).append( _subject == null ? "Patient" : _subject );
+         sb.append( " | history of: " ).append( _historyOf );
+         sb.append( '\n' );
+         return sb.toString();
+      }
+   }
+
+   /**
+    * Analysis Engine that stores collections of cuis by document id in the CuiCollector
+    */
+   static public final class EntityCollectorEngine extends JCasAnnotator_ImplBase {
+      @Override
+      public void process( final JCas jCas ) {
+         LOGGER.info( "Starting processing" );
+         final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+         final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( jCas, IdentifiedAnnotation.class );
+         putEntities( id, annotations );
+         LOGGER.info( "Finished processing" );
+      }
+
+      static private void putEntities( final String documentId, final Collection<IdentifiedAnnotation> annotations ) {
+         final Collection<Entity> entities = annotations.stream().map( Entity::new ).collect( Collectors.toList() );
+         EntityCollector.getInstance()._entityMap.put( documentId, entities );
+      }
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,237 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.ae.PropertyAeFactory;
+import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Creates a pipeline using a small set of simple methods.
+ * <p>
+ * Some methods are order-specific and calls will directly impact ordering within the pipeline.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/9/2016
+ */
+final public class PipelineBuilder {
+
+   static private final Logger LOGGER = Logger.getLogger( "PipelineBuilder" );
+
+
+   private final List<AnalysisEngineDescription> _aeList;
+   private CollectionReader _reader;
+
+   public PipelineBuilder() {
+      _aeList = new ArrayList<>();
+   }
+
+   /**
+    * Use of this method is order-specific
+    *
+    * @param filePath path to .properties file with ae parameter name value pairs
+    * @return this PipelineBuilder
+    */
+   public PipelineBuilder loadParameters( final String filePath ) {
+      PropertyAeFactory.getInstance().loadPropertyFile( filePath );
+      return this;
+   }
+
+   /**
+    * Use of this method is order-specific
+    *
+    * @param parameters add ae parameter name value pairs
+    * @return this PipelineBuilder
+    */
+   public PipelineBuilder addParameters( final Object... parameters ) {
+      PropertyAeFactory.getInstance().addParameters( parameters );
+      return this;
+   }
+
+   /**
+    * Use of this method is not order-specific
+    *
+    * @param reader Collection Reader to place at the beginning of the pipeline
+    * @return this PipelineBuilder
+    */
+   public PipelineBuilder reader( final CollectionReader reader ) {
+      _reader = reader;
+      return this;
+   }
+
+   /**
+    * Adds a Collection reader to the beginning of the pipeline that will read files in a directory.
+    * Relies upon {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} having been specified
+    * Use of this method is not order-specific.
+    *
+    * @return this PipelineBuilder
+    * @throws UIMAException if the collection reader cannot be created
+    */
+   public PipelineBuilder readFiles() throws UIMAException {
+      _reader = CollectionReaderFactory.createReader( FilesInDirectoryCollectionReader.class );
+      return this;
+   }
+
+   /**
+    * Adds a Collection reader to the beginning of the pipeline that will read files in a directory.
+    * Use of this method is not order-specific
+    *
+    * @param inputDirectory directory with input files
+    * @return this PipelineBuilder
+    * @throws UIMAException if the collection reader cannot be created
+    */
+   public PipelineBuilder readFiles( final String inputDirectory ) throws UIMAException {
+      _reader = CollectionReaderFactory.createReader( FilesInDirectoryCollectionReader.class,
+            FilesInDirectoryCollectionReader.PARAM_INPUTDIR,
+            inputDirectory );
+      return this;
+   }
+
+   /**
+    * Use of this method is order-specific.
+    *
+    * @param component  ae or cc component class to add to the pipeline
+    * @param parameters ae or cc parameter name value pairs.  May be empty.
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the component cannot be created
+    */
+   public PipelineBuilder add( final Class<? extends AnalysisComponent> component,
+                               final Object... parameters ) throws ResourceInitializationException {
+      _aeList.add( PropertyAeFactory.getInstance().createDescription( component, parameters ) );
+      return this;
+   }
+
+   /**
+    * Adds an ae or cc wrapped with "Starting processing" and "Finished processing" log messages
+    * Use of this method is order-specific.
+    *
+    * @param component  ae or cc component class to add to the pipeline
+    * @param parameters ae or cc parameter name value pairs.  May be empty.
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the component cannot be created
+    */
+   public PipelineBuilder addLogged( final Class<? extends AnalysisComponent> component,
+                                     final Object... parameters ) throws ResourceInitializationException {
+      _aeList.add( PropertyAeFactory.getInstance().createLoggedDescription( component, parameters ) );
+      return this;
+   }
+
+   /**
+    * Use of this method is order-specific.
+    *
+    * @param description ae or cc component class description to add to the pipeline
+    * @return this PipelineBuilder
+    */
+   public PipelineBuilder addDescription( final AnalysisEngineDescription description ) {
+      _aeList.add( description );
+      return this;
+   }
+
+   /**
+    * Adds ae that maintains CUI information throughout the run.
+    * CUI information can later be accessed using the {@link CuiCollector} singleton
+    * Use of this method is order-specific.
+    *
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the CuiCollector engine cannot be created
+    */
+   public PipelineBuilder collectCuis() throws ResourceInitializationException {
+      return add( CuiCollector.CuiCollectorEngine.class );
+   }
+
+   /**
+    * Adds ae that maintains simple Entity information throughout the run.
+    * Entity information can later be accessed using the {@link EntityCollector} singleton
+    * Use of this method is order-specific.
+    *
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the EntityCollector engine cannot be created
+    */
+   public PipelineBuilder collectEntities() throws ResourceInitializationException {
+      return add( EntityCollector.EntityCollectorEngine.class );
+   }
+
+   /**
+    * Adds ae that writes an xmi file.
+    * Relies upon {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} having been specified
+    * Use of this method is order-specific.
+    *
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the Xmi writer engine cannot be created
+    */
+   public PipelineBuilder writeXMIs() throws ResourceInitializationException {
+      return add( XmiWriterCasConsumerCtakes.class );
+   }
+
+   /**
+    * Adds ae that writes an xmi file.
+    * Use of this method is order-specific.
+    *
+    * @param outputDirectory directory in which xmi files should be written
+    * @return this PipelineBuilder
+    * @throws ResourceInitializationException if the Xmi writer engine cannot be created
+    */
+   public PipelineBuilder writeXMIs( final String outputDirectory ) throws ResourceInitializationException {
+      return add( XmiWriterCasConsumerCtakes.class, XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR, outputDirectory );
+   }
+
+   /**
+    * Run the pipeline using some specified collection reader.
+    * Use of this method is order-specific.
+    *
+    * @return this PipelineBuilder
+    * @throws IOException   if the pipeline could not be run
+    * @throws UIMAException if the pipeline could not be run
+    */
+   public PipelineBuilder run() throws IOException, UIMAException {
+      if ( _reader == null ) {
+         LOGGER.error( "No Collection Reader specified." );
+         return this;
+      }
+      final AggregateBuilder builder = new AggregateBuilder();
+      _aeList.forEach( builder::add );
+      final AnalysisEngineDescription desc = builder.createAggregateDescription();
+      SimplePipeline.runPipeline( _reader, desc );
+      return this;
+   }
+
+   /**
+    * Run the pipeline on the given text.
+    * Use of this method is order-specific.
+    *
+    * @param text text upon which to run this pipeline
+    * @return this PipelineBuilder
+    * @throws IOException   if the pipeline could not be run
+    * @throws UIMAException if the pipeline could not be run
+    */
+   public PipelineBuilder run( final String text ) throws IOException, UIMAException {
+      if ( _reader != null ) {
+         LOGGER.error( "Collection Reader specified, ignoring." );
+         return this;
+      }
+      final JCas jcas = JCasFactory.createJCas();
+      jcas.setDocumentText( text );
+      final AggregateBuilder builder = new AggregateBuilder();
+      _aeList.forEach( builder::add );
+      final AnalysisEngineDescription desc = builder.createAggregateDescription();
+      SimplePipeline.runPipeline( jcas, desc );
+      return this;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,339 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Pattern;
+
+/**
+ * Creates a pipeline (PipelineBuilder) from specifications in a flat plaintext file.
+ * <p>
+ * <p>There are several basic commands:
+ * addPackage <i>user_package_name</i>
+ * loadParameters <i>path_to_properties_file_with_ae_parameters</i>
+ * addParameters <i>ae_parameter_name</i>|<i>ae_parameter_value</i>| ...
+ * reader <i>collection_reader_class_name</i>
+ * readFiles <i>input_directory</i>
+ * <i>input_directory</i> can be empty if {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} was specified
+ * add <i>ae_or_cc_class_name</i>
+ * addLogged <i>ae_or_cc_class_name</i>
+ * collectCuis
+ * collectEntities
+ * writeXmis <i>output_directory</i>
+ * <i>output_directory</i> can be empty if {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} was specified
+ * <p>
+ * # and // may be used to mark line comments
+ * </p>
+ * <p>
+ * class names must be fully-specified with package unless they are in standard ctakes cr ae or cc packages,
+ * or in a package specified by an earlier addPackage command.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class PipelineReader {
+
+   static private final Logger LOGGER = Logger.getLogger( "PipelineReader" );
+
+   static private final String[] CTAKES_PACKAGES
+         = { "core",
+             "assertion",
+             "chunker",
+             "clinicalpipeline",
+             "constituency.parser",
+             "contexttokenizer",
+             "coreference",
+             "dependency.parser",
+             "dictionary.lookup2",
+             "dictionary.lookup",
+             "temporal",
+             "drug-ner",
+             "lvg",
+             "necontexts",
+             "postagger",
+             "prepropessor",
+             "relationextractor",
+             "sideeffect",
+             "smokingstatus",
+             "template.filler" };
+
+   static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ];
+
+   static private final Pattern SPLIT_PATTERN = Pattern.compile( "\\|" );
+
+   private PipelineBuilder _builder;
+
+   private final Collection<String> _userPackages;
+
+
+   /**
+    * Create and empty PipelineReader
+    */
+   public PipelineReader() {
+      _builder = new PipelineBuilder();
+      _userPackages = new ArrayList<>();
+   }
+
+   /**
+    * Create a PipelineReader and load a file with command parameter pairs for building a pipeline
+    *
+    * @param filePath path to the pipeline command file
+    * @throws UIMAException if the pipeline cannot be loaded
+    */
+   public PipelineReader( final String filePath ) throws UIMAException {
+      _builder = new PipelineBuilder();
+      _userPackages = new ArrayList<>();
+      loadPipelineFile( filePath );
+   }
+
+   /**
+    * Load a file with command parameter pairs for building a pipeline
+    *
+    * @param filePath path to the pipeline command file
+    */
+   public void loadPipelineFile( final String filePath ) throws UIMAException {
+      try ( final BufferedReader reader
+                  = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( filePath ) ) ) ) {
+         String line = reader.readLine();
+         while ( line != null ) {
+            line = line.trim();
+            if ( line.isEmpty() || line.startsWith( "//" ) || line.startsWith( "#" ) ) {
+               line = reader.readLine();
+               continue;
+            }
+            final int spaceIndex = line.indexOf( ' ' );
+            if ( spaceIndex < 3 ) {
+               addToPipeline( line, "" );
+            } else {
+               addToPipeline( line.substring( 0, spaceIndex ), line.substring( spaceIndex + 1 ).trim() );
+            }
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Property File not found: " + filePath );
+      }
+   }
+
+   /**
+    * @return the PipelineBuilder with its current state set by this PipelineReader
+    */
+   public PipelineBuilder getBuilder() {
+      return _builder;
+   }
+
+   /**
+    * @param command   specified by first word in the file line
+    * @param parameter specified by second word in the file line
+    * @throws UIMAException if the command could not be executed
+    */
+   private void addToPipeline( final String command, final String parameter ) throws UIMAException {
+      switch ( command ) {
+         case "addPackage":
+            _userPackages.add( parameter );
+            break;
+         case "loadParameters":
+            _builder.loadParameters( parameter );
+            break;
+         case "addParameters":
+            _builder.addParameters( getStrings( parameter ) );
+            break;
+         case "reader":
+            _builder.reader( createReader( parameter ) );
+            break;
+         case "readFiles":
+            if ( parameter.isEmpty() ) {
+               _builder.readFiles();
+            } else {
+               _builder.readFiles( parameter );
+            }
+            break;
+         case "add":
+            _builder.add( getComponentClass( parameter ) );
+            break;
+         case "addLogged":
+            _builder.addLogged( getComponentClass( parameter ) );
+            break;
+         case "collectCuis":
+            _builder.collectCuis();
+            break;
+         case "collectEntites":
+            _builder.collectEntities();
+            break;
+         case "writeXmis":
+            if ( parameter.isEmpty() ) {
+               _builder.writeXMIs();
+            } else {
+               _builder.writeXMIs( parameter );
+            }
+            break;
+         default:
+            LOGGER.error( "Unknown Command: " + command );
+      }
+   }
+
+   /**
+    * @param className fully-specified or simple name of an ae or cc component class
+    * @return discovered class for ae or cc
+    * @throws ResourceInitializationException if the class could not be found
+    */
+   private Class<? extends AnalysisComponent> getComponentClass( final String className ) throws
+                                                                                          ResourceInitializationException {
+      Class componentClass;
+      try {
+         componentClass = Class.forName( className );
+      } catch ( ClassNotFoundException cnfE ) {
+         componentClass = getPackagedComponent( className );
+      }
+      if ( componentClass == null ) {
+         throw new ResourceInitializationException(
+               "No Analysis Component found for " + className, EMPTY_OBJECT_ARRAY );
+      }
+      assertClassType( componentClass, AnalysisComponent.class );
+      return componentClass;
+   }
+
+   /**
+    * @param className fully-specified or simple name of an ae or cc component class
+    * @return discovered class for ae or cc
+    */
+   private Class<? extends AnalysisComponent> getPackagedComponent( final String className ) {
+      Class componentClass;
+      for ( String packageName : _userPackages ) {
+         componentClass = getPackagedClass( packageName, className, AnalysisComponent.class );
+         if ( componentClass != null ) {
+            return componentClass;
+         }
+      }
+      for ( String packageName : CTAKES_PACKAGES ) {
+         componentClass = getPackagedClass(
+               "org.apache.ctakes." + packageName + ".ae", className, AnalysisComponent.class );
+         if ( componentClass != null ) {
+            return componentClass;
+         }
+         componentClass = getPackagedClass(
+               "org.apache.ctakes." + packageName + ".cc", className, AnalysisComponent.class );
+         if ( componentClass != null ) {
+            return componentClass;
+         }
+      }
+      return null;
+   }
+
+   /**
+    * @param className fully-specified or simple name of a cr Collection Reader class
+    * @return instantiated collection reader
+    * @throws ResourceInitializationException if the class could not be found or instantiated
+    */
+   private CollectionReader createReader( final String className ) throws ResourceInitializationException {
+      Class<?> readerClass;
+      try {
+         readerClass = Class.forName( className );
+      } catch ( ClassNotFoundException cnfE ) {
+         readerClass = getPackagedReader( className );
+      }
+      if ( readerClass == null ) {
+         throw new ResourceInitializationException( "No Collection Reader found for " + className, EMPTY_OBJECT_ARRAY );
+      }
+      assertClassType( readerClass, CollectionReader.class );
+      final Constructor<?>[] constructors = readerClass.getConstructors();
+      for ( Constructor<?> constructor : constructors ) {
+         try {
+            if ( constructor.getParameterTypes().length == 0 ) {
+               return (CollectionReader)constructor.newInstance();
+            }
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException iniaitE ) {
+            throw new ResourceInitializationException(
+                  "Could not construct " + className, EMPTY_OBJECT_ARRAY, iniaitE );
+         }
+      }
+      throw new ResourceInitializationException( "No Constructor for " + className, EMPTY_OBJECT_ARRAY );
+   }
+
+   /**
+    * @param className simple name of a cr Collection Reader class
+    * @return discovered class for a cr
+    */
+   private Class<? extends CollectionReader> getPackagedReader( final String className ) {
+      Class readerClass;
+      for ( String packageName : _userPackages ) {
+         readerClass = getPackagedClass( packageName, className, CollectionReader.class );
+         if ( readerClass != null ) {
+            return readerClass;
+         }
+      }
+      for ( String packageName : CTAKES_PACKAGES ) {
+         readerClass = getPackagedClass(
+               "org.apache.ctakes." + packageName + ".cr", className, CollectionReader.class );
+         if ( readerClass != null ) {
+            return readerClass;
+         }
+      }
+      return null;
+   }
+
+   /**
+    * @param packageName     possible package for class
+    * @param className       simple name for class
+    * @param wantedClassType desired superclass type
+    * @return discovered class or null if no proper class was discovered
+    */
+   static private Class<?> getPackagedClass( final String packageName, final String className,
+                                             final Class<?> wantedClassType ) {
+      try {
+         Class<?> classType = Class.forName( packageName + "." + className );
+         if ( isClassType( classType, wantedClassType ) ) {
+            return classType;
+         }
+      } catch ( ClassNotFoundException cnfE ) {
+         // do nothing
+      }
+      return null;
+   }
+
+   /**
+    * @param classType       class type to test
+    * @param wantedClassType wanted class type
+    * @throws ResourceInitializationException if the class type does not extend the wanted class type
+    */
+   static private void assertClassType( final Class<?> classType, final Class<?> wantedClassType )
+         throws ResourceInitializationException {
+      if ( !isClassType( classType, wantedClassType ) ) {
+         throw new ResourceInitializationException(
+               "Not " + wantedClassType.getSimpleName() + " " + classType.getName(), EMPTY_OBJECT_ARRAY );
+      }
+   }
+
+   /**
+    * @param classType       class type to test
+    * @param wantedClassType wanted class type
+    * @return true if the class type extends the wanted class type
+    */
+   static private boolean isClassType( final Class<?> classType, final Class<?> wantedClassType ) {
+      return wantedClassType.isAssignableFrom( classType );
+   }
+
+   /**
+    * @param parameter text
+    * @return array created by splitting text at '|' characters
+    */
+   static private String[] getStrings( final String parameter ) {
+      return SPLIT_PATTERN.split( parameter );
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java Tue Oct 11 02:06:22 2016
@@ -85,22 +85,27 @@ final public class DocumentIDAnnotationU
    static public String getDeepDocumentId( final JCas startingJcas ) {
       String documentID = getDocumentID( startingJcas );
       if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
+         LOGGER.debug( "Checking document Id for initial view" );
          try {
-            LOGGER.debug( "Checking document Id for initial view" );
             final JCas viewJcas = startingJcas.getView( "_InitialView" );
             documentID = DocumentIDAnnotationUtil.getDocumentID( viewJcas );
+         } catch ( CASException | CASRuntimeException casE ) {
+            LOGGER.warn( casE.getMessage() );
+            documentID = NO_DOCUMENT_ID;
+         }
+         if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
+            LOGGER.debug( "Checking document Id for plaintext view" );
+            try {
+               final JCas viewJcas = startingJcas.getView( "plaintext" );
+               documentID = DocumentIDAnnotationUtil.getDocumentID( viewJcas );
+            } catch ( CASException | CASRuntimeException casE ) {
+               LOGGER.warn( casE.getMessage() );
+               documentID = NO_DOCUMENT_ID;
+            }
             if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
-               LOGGER.debug( "Checking document Id for plaintext view" );
-               final JCas plaintextJcas = startingJcas.getView( "plaintext" );
-               documentID = DocumentIDAnnotationUtil.getDocumentID( plaintextJcas );
-               if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
-                  LOGGER.warn( "Unable to find DocumentIDAnnotation" );
-                  return NO_DOCUMENT_ID;
-               }
+               LOGGER.warn( "Unable to find DocumentIDAnnotation" );
+               return NO_DOCUMENT_ID;
             }
-         } catch ( CASException casE ) {
-            LOGGER.warn( "Unable to find DocumentIDAnnotation", casE );
-            return NO_DOCUMENT_ID;
          }
       }
       return documentID;

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java Tue Oct 11 02:06:22 2016
@@ -173,6 +173,14 @@ final public class OntologyConceptUtil {
 
    /**
     * @param jcas -
+    * @return map of all cuis in the jcas and their counts
+    */
+   static public Map<String, Long> getCuiCounts( final JCas jcas ) {
+      return getCuiCounts( JCasUtil.select( jcas, IdentifiedAnnotation.class ) );
+   }
+
+   /**
+    * @param jcas -
     * @return set of all tuis in jcas
     */
    static public Collection<String> getTuis( final JCas jcas ) {
@@ -275,6 +283,17 @@ final public class OntologyConceptUtil {
    }
 
    /**
+    * @param annotations -
+    * @return map of all Umls cuis associated with the annotations and the counts of those cuis
+    */
+   static public Map<String, Long> getCuiCounts( final Collection<IdentifiedAnnotation> annotations ) {
+      return annotations.stream()
+            .map( OntologyConceptUtil::getCuis )
+            .flatMap( Collection::stream )
+            .collect( Collectors.groupingBy( Function.identity(), Collectors.counting() ) );
+   }
+
+   /**
     * @param annotations -
     * @return set of all Umls tuis associated with the annotation
     */

Modified: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java (original)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java Tue Oct 11 02:06:22 2016
@@ -1,17 +1,19 @@
 package org.apache.ctakes.examples.pipelines;
 
-import java.io.FileWriter;
-
 import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
 import org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.uima.jcas.JCas;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
 import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.io.FileWriter;
 
 /**
+ * Build and run a pipeline using uimafit {@link AggregateBuilder}, {@link SimplePipeline} and {@link JCasUtil}
+ *
  * Example of a running a pipeline programatically w/o uima xml descriptor xml files
  * Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
  * 

Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,60 @@
+package org.apache.ctakes.examples.pipelines;
+
+
+import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
+import org.apache.ctakes.core.pipeline.EntityCollector;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
+import org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+
+import java.io.IOException;
+
+/**
+ * Build and run a pipeline using a {@link PipelineBuilder}.
+ * <p>
+ * Example of a running a pipeline programatically w/o uima xml descriptor xml files
+ * Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class ExampleBuilderPipeline {
+
+   static private final Logger LOGGER = Logger.getLogger( "ExampleBuilderPipeline" );
+
+   private ExampleBuilderPipeline() {
+   }
+
+   /**
+    * @param args an output directory for xmi files or none if xmi files are not wanted
+    */
+   public static void main( final String... args ) {
+      final String text = "Hello World!";
+      try {
+         PipelineBuilder builder = new PipelineBuilder();
+         builder
+               // Add a simple pre-defined existing pipeline for Tokenization
+               // Could also add engines individually
+               .addDescription( ClinicalPipelineFactory.getTokenProcessingPipeline() )
+               // Add the new HelloWorld Example
+               .add( ExampleHelloWorldAnnotator.class )
+               // Collect the Entities
+               .collectEntities();
+         if ( args.length > 0 ) {
+            //Example to save the Aggregate descriptor to an xml file for external
+            //use such as the UIMA CVD/CPE
+            builder.writeXMIs( args[ 0 ] );
+         }
+         // Run the pipeline with specified text
+         builder.run( text );
+      } catch ( IOException | UIMAException multE ) {
+         LOGGER.error( multE.getMessage() );
+      }
+      //Print out the IdentifiedAnnotation objects
+      LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,63 @@
+package org.apache.ctakes.examples.pipelines;
+
+
+import org.apache.ctakes.core.pipeline.EntityCollector;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
+import org.apache.ctakes.core.pipeline.PipelineReader;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+
+import java.io.IOException;
+
+/**
+ * Build and run a pipeline using a {@link PipelineReader} and a {@link PipelineBuilder}.
+ * <p>
+ * Example of a running a pipeline programatically w/o uima xml descriptor xml files
+ * Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class ExampleReaderPipeline {
+
+   static private final Logger LOGGER = Logger.getLogger( "ExampleReaderPipeline" );
+
+   static private final String PIPELINE_1_PATH = "org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt";
+   static private final String PIPELINE_2_PATH = "org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt";
+
+   private ExampleReaderPipeline() {
+   }
+
+   /**
+    * @param args an output directory for xmi files or none if xmi files are not wanted
+    */
+   public static void main( final String... args ) {
+      final String text = "Hello World!";
+      try {
+         // Add a simple pre-defined existing pipeline for Tokenization from file
+         final PipelineReader reader = new PipelineReader( PIPELINE_1_PATH );
+         // add the POS Tagger manually
+         PipelineBuilder builder = reader.getBuilder();
+         builder.addDescription( POSTagger.createAnnotatorDescription() );
+         // Add the new HelloWorld Example by reading from file
+         reader.loadPipelineFile( PIPELINE_2_PATH );
+         // Collect the Entities
+         builder.collectEntities();
+         if ( args.length > 0 ) {
+            //Example to save the Aggregate descriptor to an xml file for external
+            //use such as the UIMA CVD/CPE
+            builder.writeXMIs( args[ 0 ] );
+         }
+         // Run the pipeline with specified text
+         builder.run( text );
+      } catch ( IOException | UIMAException multE ) {
+         LOGGER.error( multE.getMessage() );
+      }
+      //Print out the IdentifiedAnnotation objects
+      LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt (added)
+++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt Tue Oct 11 02:06:22 2016
@@ -0,0 +1,8 @@
+// Equivalent of ClinicalPipelineFactory.getTokenProcessingPipeline()
+add SimpleSegmentAnnotator
+add SentenceDetector
+add TokenizerAnnotatorPTB
+add ContextDependentTokenizerAnnotator
+
+// The POSTagger has a -complex- startup and should be added manually
+# add POSTagger

Added: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt (added)
+++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt Tue Oct 11 02:06:22 2016
@@ -0,0 +1,4 @@
+// After -manual- addition of POSTagger
+// Can use addPackage then add, or just use add with the fully-specified class and package
+addPackage org.apache.ctakes.examples.ae
+add ExampleHelloWorldAnnotator