You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/10/11 02:06:23 UTC
svn commit: r1764190 - in /ctakes/trunk:
ctakes-core/src/main/java/org/apache/ctakes/core/ae/
ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/
ctakes-core/src/main/java/org/apache/ctakes/core/util/
ctakes-examples/src/main/java/org/apache/cta...
Author: seanfinan
Date: Tue Oct 11 02:06:22 2016
New Revision: 1764190
URL: http://svn.apache.org/viewvc?rev=1764190&view=rev
Log:
PropertyAeFactory javadocs, refactoring
SentenceDetector set default model in Parameter
DocumentIdAnnotationUtil protection against missing common Views in the Cas
OntologyConceptUtil added getCuiCounts()
ExampleAggregatePipeline javadocs
Adding pipeline simplification utilities to core:
CuiCollector stores cuis
EntityCollector stores entity info
PipelineBuilder adds really simplified pipeline construction
PipelineReader adds simple pipeline construction using instructions in a flat file
Adding examples using new pipeline utilities
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java
ctakes/trunk/ctakes-examples/src/main/resources/org/
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java Tue Oct 11 02:06:22 2016
@@ -15,8 +15,8 @@ import java.util.Map;
import java.util.Properties;
/**
- * There may be some way to get values directly into the root UimaContext.
- * This factory can load plain old java properties files and pass the specified properties as parameters for AE creation
+ * This factory can load plain old java .properties files and pass the specified properties as parameters for AE creation.
+ * There may be some way to get values directly into the root UimaContext, but for now this works with UimaFit parameters.
*
* @author SPF , chip-nlp
* @version %I%
@@ -33,29 +33,55 @@ public enum PropertyAeFactory {
// Use a single hashmap so that multiple properties files can be used
- final private Map<String, String> _properties = new HashMap<>();
+ final private Map<String, Object> _properties = new HashMap<>();
+ /**
+ * Load a .properties file with key value pairs
+ *
+ * @param filePath -
+ */
synchronized public void loadPropertyFile( final String filePath ) {
try ( InputStream stream = FileLocator.getAsStream( filePath ) ) {
final Properties properties = new Properties();
properties.load( stream );
- for ( String name : properties.stringPropertyNames() ) {
- final String value = properties.getProperty( name );
- if ( value == null ) {
- LOGGER.warn( "Property has no value: " + name );
- } else {
- _properties.put( name, value );
- }
- }
+ properties.entrySet().forEach( e -> _properties.put( e.getKey().toString(), e.getValue() ) );
} catch ( IOException ioE ) {
LOGGER.error( "Property File not found: " + filePath );
}
}
- static private Object[] createParameters( final Map<String, String> parameterMap ) {
+ /**
+ * Add key value pairs to the stored properties
+ *
+ * @param parameters ket value pairs
+ */
+ synchronized public void addParameters( final Object... parameters ) {
+ if ( parameters.length == 0 ) {
+ LOGGER.warn( "No parameters specified." );
+ return;
+ }
+ if ( parameters.length % 2 != 0 ) {
+ LOGGER.error( "Odd number of parameters provided. Should be key value pairs." );
+ return;
+ }
+ for ( int i = 0; i < parameters.length; i += 2 ) {
+ if ( parameters[ i ] instanceof String ) {
+ _properties.put( (String)parameters[ i ], parameters[ i + 1 ] );
+ } else {
+ LOGGER.warn( "Parameter " + i + " not a String, using " + parameters[ i ].toString() );
+ _properties.put( parameters[ i ].toString(), parameters[ i + 1 ] );
+ }
+ }
+ }
+
+ /**
+ * @param parameterMap map of parameter names and values
+ * @return array of Objects representing name value pairs
+ */
+ static private Object[] createParameters( final Map<String, Object> parameterMap ) {
final Object[] parameters = new Object[ parameterMap.size() * 2 ];
int i = 0;
- for ( Map.Entry<String, String> entry : parameterMap.entrySet() ) {
+ for ( Map.Entry<String, Object> entry : parameterMap.entrySet() ) {
parameters[ i ] = entry.getKey();
parameters[ i + 1 ] = entry.getValue();
i += 2;
@@ -68,19 +94,29 @@ public enum PropertyAeFactory {
* @return new parameter arrays containing parameters loaded by this factory and followed by specified parameters
*/
synchronized private Object[] getAllParameters( final Object... parameters ) {
- if ( _properties.isEmpty() ) {
- return parameters;
+ if ( parameters.length == 0 ) {
+ return createParameters( _properties );
}
- if ( parameters == null || parameters.length == 0 ) {
+ if ( parameters.length % 2 != 0 ) {
+ LOGGER.error( "Odd number of parameters provided. Should be key value pairs." );
return createParameters( _properties );
}
- final Map<String, String> parameterMap = new HashMap<>( _properties );
+ if ( _properties.isEmpty() ) {
+ return parameters;
+ }
+ final Map<String, Object> parameterMap = new HashMap<>( _properties );
for ( int i = 0; i < parameters.length; i += 2 ) {
- parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ].toString() );
+ if ( parameters[ i ] instanceof String ) {
+ parameterMap.put( (String)parameters[ i ], parameters[ i + 1 ] );
+ } else {
+ LOGGER.warn( "Parameter " + i + " not a String, using " + parameters[ i ].toString() );
+ parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ] );
+ }
}
return createParameters( parameterMap );
}
+
/**
* This method should be avoided. See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
*
@@ -143,7 +179,7 @@ public enum PropertyAeFactory {
* @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
* @throws ResourceInitializationException if UimaFit has a problem
*/
- public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+ static public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
throws ResourceInitializationException {
return StartFinishLogger.createLoggedDescription( mainDescription );
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java Tue Oct 11 02:06:22 2016
@@ -18,28 +18,11 @@
*/
package org.apache.ctakes.core.ae;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.sentdetect.DefaultSDContextGenerator;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.sentdetect.SentenceSampleStream;
+import opennlp.tools.sentdetect.*;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
-
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
@@ -51,12 +34,16 @@ import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+import java.nio.charset.Charset;
+import java.util.*;
/**
* Wraps the OpenNLP sentence detector in a UIMA annotator
@@ -82,9 +69,9 @@ public class SentenceDetector extends JC
public static final String SD_MODEL_FILE_PARAM = PARAM_SD_MODEL_FILE; // backwards compatibility
@ConfigurationParameter(
name = PARAM_SD_MODEL_FILE,
- mandatory = true,
- description = "Path to sentence detector model file"
- )
+ description = "Path to sentence detector model file",
+ defaultValue = "org/apache/ctakes/core/sentdetect/sd-med-model.zip"
+ )
private String sdModelPath;
private opennlp.tools.sentdetect.SentenceModel sdmodel;
@@ -250,10 +237,8 @@ public class SentenceDetector extends JC
}
public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException{
- return AnalysisEngineFactory.createEngineDescription(SentenceDetector.class,
- SentenceDetector.PARAM_SD_MODEL_FILE,
- "org/apache/ctakes/core/sentdetect/sd-med-model.zip");
- }
+ return AnalysisEngineFactory.createEngineDescription( SentenceDetector.class );
+ }
/**
* Train a new sentence detector from the training data in the first file
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/CuiCollector.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,135 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.jcas.JCas;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Stores a collection of Cuis from a run, along with their associated Document Ids
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/9/2016
+ */
+public enum CuiCollector {
+ INSTANCE;
+
+ static public CuiCollector getInstance() {
+ return INSTANCE;
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "CuiCollector" );
+
+ private final Map<String, Map<String, Long>> _cuiCountMap = new HashMap<>();
+
+ /**
+ * @return Ids for documents that have Cuis stored in the CuiCollector
+ */
+ public Collection<String> getDocumentIds() {
+ return Collections.unmodifiableCollection( _cuiCountMap.keySet() );
+ }
+
+ /**
+ * @param documentId id for some document
+ * @return cuis discovered in the document
+ */
+ public Collection<String> getCuis( final String documentId ) {
+ return Collections.unmodifiableCollection( get( documentId ).keySet() );
+ }
+
+ /**
+ * @param documentId id for some document
+ * @return map of cuis discovered in the document and how many times they were discovered
+ */
+ public Map<String, Long> getCuiCounts( final String documentId ) {
+ return get( documentId );
+ }
+
+ /**
+ * @return all cuis found in all documents in the run
+ */
+ public Collection<String> getCuis() {
+ return Collections.unmodifiableCollection( _cuiCountMap.values().stream()
+ .map( Map::keySet )
+ .flatMap( Collection::stream )
+ .collect( Collectors.toSet() ) );
+ }
+
+ /**
+ * @return map of cuis discovered in all documents in the run and how many times they were discovered
+ */
+ public Map<String, Long> getCuiCounts() {
+ return Collections.unmodifiableMap(
+ _cuiCountMap.values().stream()
+ .map( Map::entrySet )
+ .flatMap( Collection::stream )
+ .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( n1, n2 ) -> n1 + n2 ) ) );
+ }
+
+ /**
+ * @param documentId id for some document
+ * @return map of cuis discovered in the document and how many times they were discovered, with a warning if none exist
+ */
+ private Map<String, Long> get( final String documentId ) {
+ final Map<String, Long> cuiCounts = _cuiCountMap.get( documentId );
+ if ( cuiCounts == null ) {
+ LOGGER.warn( "No Cuis for " + documentId );
+ return Collections.emptyMap();
+ }
+ return Collections.unmodifiableMap( cuiCounts );
+ }
+
+ /**
+ * @return table of document ids and cui counts
+ */
+ @Override
+ public String toString() {
+ final List<String> allCuis = getCuis().stream().sorted().collect( Collectors.toList() );
+ final String header = "DOCUMENT_ID|" + String.join( "|", allCuis );
+ final String rows = _cuiCountMap.entrySet().stream()
+ .sorted()
+ .map( e -> createRowText( e.getKey(), e.getValue(), allCuis ) )
+ .collect( Collectors.joining() );
+ return header + "\n" + rows;
+ }
+
+ /**
+ * @param documentId id for some document
+ * @param cuiCounts map of cuis discovered in the document and how many times they were discovered
+ * @param allCuis all cuis found in all documents in the run
+ * @return row of document id and cui counts
+ */
+ static private String createRowText( final String documentId, final Map<String, Long> cuiCounts,
+ final List<String> allCuis ) {
+ final StringBuilder sb = new StringBuilder();
+ sb.append( documentId );
+ for ( String cui : allCuis ) {
+ sb.append( "|" );
+ final Long count = cuiCounts.get( cui );
+ sb.append( count == null ? "0" : count );
+ }
+ sb.append( "\n" );
+ return sb.toString();
+ }
+
+ /**
+ * Analysis Engine that stores collections of cuis by document id in the CuiCollector
+ */
+ static public final class CuiCollectorEngine extends JCasAnnotator_ImplBase {
+ @Override
+ public void process( final JCas jCas ) {
+ LOGGER.info( "Starting processing" );
+ final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+ final Map<String, Long> cuiCounts = OntologyConceptUtil.getCuiCounts( jCas );
+ CuiCollector.getInstance()._cuiCountMap.put( id, cuiCounts );
+ LOGGER.info( "Finished processing" );
+ }
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,134 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Stores a collection of simple entity information from a run, along with their associated Document Ids
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+public enum EntityCollector {
+ INSTANCE;
+
+ static public EntityCollector getInstance() {
+ return INSTANCE;
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "EntityCollector" );
+
+
+ private final Map<String, Collection<Entity>> _entityMap = new HashMap<>();
+
+ /**
+ * @return Ids for documents that have entity information stored in the EntityCollector
+ */
+ public Collection<String> getDocumentIds() {
+ return Collections.unmodifiableCollection( _entityMap.keySet() );
+ }
+
+ /**
+ * @param documentId id for some document
+ * @return simple entity objects for the document
+ */
+ public Collection<Entity> getEntities( final String documentId ) {
+ final Collection<Entity> entities = _entityMap.get( documentId );
+ if ( entities == null ) {
+ LOGGER.warn( "No Entities for " + documentId );
+ return Collections.emptyList();
+ }
+ return Collections.unmodifiableCollection( entities );
+ }
+
+ /**
+ * @return staggered list of document ids, entities and entity properties
+ */
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ for ( Map.Entry<String, Collection<Entity>> entry : _entityMap.entrySet() ) {
+ sb.append( entry.getKey() ).append( "\n" );
+ entry.getValue().stream().map( Entity::toString ).forEach( sb::append );
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Holds basic information from an IdentifiedAnnotation.
+ * Necessary so the IdentifiedAnnotation can be cleaned from the Cas
+ */
+ static public final class Entity {
+ private final int _begin;
+ private final int _end;
+ private final String _coveredText;
+ private final int _polarity;
+ private final int _uncertainty;
+ private final boolean _conditional;
+ private final boolean _generic;
+ private final String _subject;
+ private final int _historyOf;
+
+ private Entity( final IdentifiedAnnotation annotation ) {
+ _begin = annotation.getBegin();
+ _end = annotation.getEnd();
+ _coveredText = annotation.getCoveredText();
+ _polarity = annotation.getPolarity();
+ _uncertainty = annotation.getUncertainty();
+ _conditional = annotation.getConditional();
+ _generic = annotation.getGeneric();
+ _subject = annotation.getSubject();
+ _historyOf = annotation.getHistoryOf();
+ }
+
+ /**
+ * @return row of entity properties
+ */
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append( '\t' ).append( _begin ).append( ',' ).append( _end ).append( '\t' )
+ .append( _coveredText ).append( "\n" );
+ sb.append( "\t\t| " ).append( _polarity < 0 ? "negated" : "affirmed" );
+ sb.append( " | " ).append( _uncertainty < 0 ? "uncertain" : "certain" );
+ sb.append( " | " ).append( _conditional ? "conditional" : "not conditional" );
+ sb.append( " | " ).append( _generic ? "generic" : "not generic" );
+ sb.append( " | " ).append( _subject == null ? "Patient" : _subject );
+ sb.append( " | history of: " ).append( _historyOf );
+ sb.append( '\n' );
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Analysis Engine that stores collections of cuis by document id in the CuiCollector
+ */
+ static public final class EntityCollectorEngine extends JCasAnnotator_ImplBase {
+ @Override
+ public void process( final JCas jCas ) {
+ LOGGER.info( "Starting processing" );
+ final String id = DocumentIDAnnotationUtil.getDeepDocumentId( jCas );
+ final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( jCas, IdentifiedAnnotation.class );
+ putEntities( id, annotations );
+ LOGGER.info( "Finished processing" );
+ }
+
+ static private void putEntities( final String documentId, final Collection<IdentifiedAnnotation> annotations ) {
+ final Collection<Entity> entities = annotations.stream().map( Entity::new ).collect( Collectors.toList() );
+ EntityCollector.getInstance()._entityMap.put( documentId, entities );
+ }
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,237 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.ae.PropertyAeFactory;
+import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Creates a pipeline using a small set of simple methods.
+ * <p>
+ * Some methods are order-specific and calls will directly impact ordering within the pipeline.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/9/2016
+ */
+final public class PipelineBuilder {
+
+ static private final Logger LOGGER = Logger.getLogger( "PipelineBuilder" );
+
+
+ private final List<AnalysisEngineDescription> _aeList;
+ private CollectionReader _reader;
+
+ public PipelineBuilder() {
+ _aeList = new ArrayList<>();
+ }
+
+ /**
+ * Use of this method is order-specific
+ *
+ * @param filePath path to .properties file with ae parameter name value pairs
+ * @return this PipelineBuilder
+ */
+ public PipelineBuilder loadParameters( final String filePath ) {
+ PropertyAeFactory.getInstance().loadPropertyFile( filePath );
+ return this;
+ }
+
+ /**
+ * Use of this method is order-specific
+ *
+ * @param parameters add ae parameter name value pairs
+ * @return this PipelineBuilder
+ */
+ public PipelineBuilder addParameters( final Object... parameters ) {
+ PropertyAeFactory.getInstance().addParameters( parameters );
+ return this;
+ }
+
+ /**
+ * Use of this method is not order-specific
+ *
+ * @param reader Collection Reader to place at the beginning of the pipeline
+ * @return this PipelineBuilder
+ */
+ public PipelineBuilder reader( final CollectionReader reader ) {
+ _reader = reader;
+ return this;
+ }
+
+ /**
+ * Adds a Collection reader to the beginning of the pipeline that will read files in a directory.
+ * Relies upon {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} having been specified
+ * Use of this method is not order-specific.
+ *
+ * @return this PipelineBuilder
+ * @throws UIMAException if the collection reader cannot be created
+ */
+ public PipelineBuilder readFiles() throws UIMAException {
+ _reader = CollectionReaderFactory.createReader( FilesInDirectoryCollectionReader.class );
+ return this;
+ }
+
+ /**
+ * Adds a Collection reader to the beginning of the pipeline that will read files in a directory.
+ * Use of this method is not order-specific
+ *
+ * @param inputDirectory directory with input files
+ * @return this PipelineBuilder
+ * @throws UIMAException if the collection reader cannot be created
+ */
+ public PipelineBuilder readFiles( final String inputDirectory ) throws UIMAException {
+ _reader = CollectionReaderFactory.createReader( FilesInDirectoryCollectionReader.class,
+ FilesInDirectoryCollectionReader.PARAM_INPUTDIR,
+ inputDirectory );
+ return this;
+ }
+
+ /**
+ * Use of this method is order-specific.
+ *
+ * @param component ae or cc component class to add to the pipeline
+ * @param parameters ae or cc parameter name value pairs. May be empty.
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the component cannot be created
+ */
+ public PipelineBuilder add( final Class<? extends AnalysisComponent> component,
+ final Object... parameters ) throws ResourceInitializationException {
+ _aeList.add( PropertyAeFactory.getInstance().createDescription( component, parameters ) );
+ return this;
+ }
+
+ /**
+ * Adds an ae or cc wrapped with "Starting processing" and "Finished processing" log messages
+ * Use of this method is order-specific.
+ *
+ * @param component ae or cc component class to add to the pipeline
+ * @param parameters ae or cc parameter name value pairs. May be empty.
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the component cannot be created
+ */
+ public PipelineBuilder addLogged( final Class<? extends AnalysisComponent> component,
+ final Object... parameters ) throws ResourceInitializationException {
+ _aeList.add( PropertyAeFactory.getInstance().createLoggedDescription( component, parameters ) );
+ return this;
+ }
+
+ /**
+ * Use of this method is order-specific.
+ *
+ * @param description ae or cc component class description to add to the pipeline
+ * @return this PipelineBuilder
+ */
+ public PipelineBuilder addDescription( final AnalysisEngineDescription description ) {
+ _aeList.add( description );
+ return this;
+ }
+
+ /**
+ * Adds ae that maintains CUI information throughout the run.
+ * CUI information can later be accessed using the {@link CuiCollector} singleton
+ * Use of this method is order-specific.
+ *
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the CuiCollector engine cannot be created
+ */
+ public PipelineBuilder collectCuis() throws ResourceInitializationException {
+ return add( CuiCollector.CuiCollectorEngine.class );
+ }
+
+ /**
+ * Adds ae that maintains simple Entity information throughout the run.
+ * Entity information can later be accessed using the {@link EntityCollector} singleton
+ * Use of this method is order-specific.
+ *
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the EntityCollector engine cannot be created
+ */
+ public PipelineBuilder collectEntities() throws ResourceInitializationException {
+ return add( EntityCollector.EntityCollectorEngine.class );
+ }
+
+ /**
+ * Adds ae that writes an xmi file.
+ * Relies upon {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} having been specified
+ * Use of this method is order-specific.
+ *
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the Xmi writer engine cannot be created
+ */
+ public PipelineBuilder writeXMIs() throws ResourceInitializationException {
+ return add( XmiWriterCasConsumerCtakes.class );
+ }
+
+ /**
+ * Adds ae that writes an xmi file.
+ * Use of this method is order-specific.
+ *
+ * @param outputDirectory directory in which xmi files should be written
+ * @return this PipelineBuilder
+ * @throws ResourceInitializationException if the Xmi writer engine cannot be created
+ */
+ public PipelineBuilder writeXMIs( final String outputDirectory ) throws ResourceInitializationException {
+ return add( XmiWriterCasConsumerCtakes.class, XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR, outputDirectory );
+ }
+
+ /**
+ * Run the pipeline using some specified collection reader.
+ * Use of this method is order-specific.
+ *
+ * @return this PipelineBuilder
+ * @throws IOException if the pipeline could not be run
+ * @throws UIMAException if the pipeline could not be run
+ */
+ public PipelineBuilder run() throws IOException, UIMAException {
+ if ( _reader == null ) {
+ LOGGER.error( "No Collection Reader specified." );
+ return this;
+ }
+ final AggregateBuilder builder = new AggregateBuilder();
+ _aeList.forEach( builder::add );
+ final AnalysisEngineDescription desc = builder.createAggregateDescription();
+ SimplePipeline.runPipeline( _reader, desc );
+ return this;
+ }
+
+ /**
+ * Run the pipeline on the given text.
+ * Use of this method is order-specific.
+ *
+ * @param text text upon which to run this pipeline
+ * @return this PipelineBuilder
+ * @throws IOException if the pipeline could not be run
+ * @throws UIMAException if the pipeline could not be run
+ */
+ public PipelineBuilder run( final String text ) throws IOException, UIMAException {
+ if ( _reader != null ) {
+ LOGGER.error( "Collection Reader specified, ignoring." );
+ return this;
+ }
+ final JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentText( text );
+ final AggregateBuilder builder = new AggregateBuilder();
+ _aeList.forEach( builder::add );
+ final AnalysisEngineDescription desc = builder.createAggregateDescription();
+ SimplePipeline.runPipeline( jcas, desc );
+ return this;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,339 @@
+package org.apache.ctakes.core.pipeline;
+
+
+import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Pattern;
+
+/**
+ * Creates a pipeline (PipelineBuilder) from specifications in a flat plaintext file.
+ * <p>
+ * <p>There are several basic commands:
+ * addPackage <i>user_package_name</i>
+ * loadParameters <i>path_to_properties_file_with_ae_parameters</i>
+ * addParameters <i>ae_parameter_name</i>|<i>ae_parameter_value</i>| ...
+ * reader <i>collection_reader_class_name</i>
+ * readFiles <i>input_directory</i>
+ * <i>input_directory</i> can be empty if {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} was specified
+ * add <i>ae_or_cc_class_name</i>
+ * addLogged <i>ae_or_cc_class_name</i>
+ * collectCuis
+ * collectEntities
+ * writeXmis <i>output_directory</i>
+ * <i>output_directory</i> can be empty if {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} was specified
+ * <p>
+ * # and // may be used to mark line comments
+ * </p>
+ * <p>
+ * class names must be fully-specified with package unless they are in standard ctakes cr ae or cc packages,
+ * or in a package specified by an earlier addPackage command.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class PipelineReader {
+
+ static private final Logger LOGGER = Logger.getLogger( "PipelineReader" );
+
+ static private final String[] CTAKES_PACKAGES
+ = { "core",
+ "assertion",
+ "chunker",
+ "clinicalpipeline",
+ "constituency.parser",
+ "contexttokenizer",
+ "coreference",
+ "dependency.parser",
+ "dictionary.lookup2",
+ "dictionary.lookup",
+ "temporal",
+ "drug-ner",
+ "lvg",
+ "necontexts",
+ "postagger",
+ "prepropessor",
+ "relationextractor",
+ "sideeffect",
+ "smokingstatus",
+ "template.filler" };
+
+ static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ];
+
+ static private final Pattern SPLIT_PATTERN = Pattern.compile( "\\|" );
+
+ private PipelineBuilder _builder;
+
+ private final Collection<String> _userPackages;
+
+
+ /**
+ * Create and empty PipelineReader
+ */
+ public PipelineReader() {
+ _builder = new PipelineBuilder();
+ _userPackages = new ArrayList<>();
+ }
+
+ /**
+ * Create a PipelineReader and load a file with command parameter pairs for building a pipeline
+ *
+ * @param filePath path to the pipeline command file
+ * @throws UIMAException if the pipeline cannot be loaded
+ */
+ public PipelineReader( final String filePath ) throws UIMAException {
+ _builder = new PipelineBuilder();
+ _userPackages = new ArrayList<>();
+ loadPipelineFile( filePath );
+ }
+
+ /**
+ * Load a file with command parameter pairs for building a pipeline
+ *
+ * @param filePath path to the pipeline command file
+ */
+ public void loadPipelineFile( final String filePath ) throws UIMAException {
+ try ( final BufferedReader reader
+ = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( filePath ) ) ) ) {
+ String line = reader.readLine();
+ while ( line != null ) {
+ line = line.trim();
+ if ( line.isEmpty() || line.startsWith( "//" ) || line.startsWith( "#" ) ) {
+ line = reader.readLine();
+ continue;
+ }
+ final int spaceIndex = line.indexOf( ' ' );
+ if ( spaceIndex < 3 ) {
+ addToPipeline( line, "" );
+ } else {
+ addToPipeline( line.substring( 0, spaceIndex ), line.substring( spaceIndex + 1 ).trim() );
+ }
+ line = reader.readLine();
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( "Property File not found: " + filePath );
+ }
+ }
+
+ /**
+ * @return the PipelineBuilder with its current state set by this PipelineReader
+ */
+ public PipelineBuilder getBuilder() {
+ return _builder;
+ }
+
+ /**
+ * @param command specified by first word in the file line
+ * @param parameter specified by second word in the file line
+ * @throws UIMAException if the command could not be executed
+ */
+ private void addToPipeline( final String command, final String parameter ) throws UIMAException {
+ switch ( command ) {
+ case "addPackage":
+ _userPackages.add( parameter );
+ break;
+ case "loadParameters":
+ _builder.loadParameters( parameter );
+ break;
+ case "addParameters":
+ _builder.addParameters( getStrings( parameter ) );
+ break;
+ case "reader":
+ _builder.reader( createReader( parameter ) );
+ break;
+ case "readFiles":
+ if ( parameter.isEmpty() ) {
+ _builder.readFiles();
+ } else {
+ _builder.readFiles( parameter );
+ }
+ break;
+ case "add":
+ _builder.add( getComponentClass( parameter ) );
+ break;
+ case "addLogged":
+ _builder.addLogged( getComponentClass( parameter ) );
+ break;
+ case "collectCuis":
+ _builder.collectCuis();
+ break;
+ case "collectEntites":
+ _builder.collectEntities();
+ break;
+ case "writeXmis":
+ if ( parameter.isEmpty() ) {
+ _builder.writeXMIs();
+ } else {
+ _builder.writeXMIs( parameter );
+ }
+ break;
+ default:
+ LOGGER.error( "Unknown Command: " + command );
+ }
+ }
+
+ /**
+ * @param className fully-specified or simple name of an ae or cc component class
+ * @return discovered class for ae or cc
+ * @throws ResourceInitializationException if the class could not be found
+ */
+ private Class<? extends AnalysisComponent> getComponentClass( final String className ) throws
+ ResourceInitializationException {
+ Class componentClass;
+ try {
+ componentClass = Class.forName( className );
+ } catch ( ClassNotFoundException cnfE ) {
+ componentClass = getPackagedComponent( className );
+ }
+ if ( componentClass == null ) {
+ throw new ResourceInitializationException(
+ "No Analysis Component found for " + className, EMPTY_OBJECT_ARRAY );
+ }
+ assertClassType( componentClass, AnalysisComponent.class );
+ return componentClass;
+ }
+
+ /**
+ * @param className fully-specified or simple name of an ae or cc component class
+ * @return discovered class for ae or cc
+ */
+ private Class<? extends AnalysisComponent> getPackagedComponent( final String className ) {
+ Class componentClass;
+ for ( String packageName : _userPackages ) {
+ componentClass = getPackagedClass( packageName, className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ }
+ for ( String packageName : CTAKES_PACKAGES ) {
+ componentClass = getPackagedClass(
+ "org.apache.ctakes." + packageName + ".ae", className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ componentClass = getPackagedClass(
+ "org.apache.ctakes." + packageName + ".cc", className, AnalysisComponent.class );
+ if ( componentClass != null ) {
+ return componentClass;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @param className fully-specified or simple name of a cr Collection Reader class
+ * @return instantiated collection reader
+ * @throws ResourceInitializationException if the class could not be found or instantiated
+ */
+ private CollectionReader createReader( final String className ) throws ResourceInitializationException {
+ Class<?> readerClass;
+ try {
+ readerClass = Class.forName( className );
+ } catch ( ClassNotFoundException cnfE ) {
+ readerClass = getPackagedReader( className );
+ }
+ if ( readerClass == null ) {
+ throw new ResourceInitializationException( "No Collection Reader found for " + className, EMPTY_OBJECT_ARRAY );
+ }
+ assertClassType( readerClass, CollectionReader.class );
+ final Constructor<?>[] constructors = readerClass.getConstructors();
+ for ( Constructor<?> constructor : constructors ) {
+ try {
+ if ( constructor.getParameterTypes().length == 0 ) {
+ return (CollectionReader)constructor.newInstance();
+ }
+ } catch ( InstantiationException | IllegalAccessException | InvocationTargetException iniaitE ) {
+ throw new ResourceInitializationException(
+ "Could not construct " + className, EMPTY_OBJECT_ARRAY, iniaitE );
+ }
+ }
+ throw new ResourceInitializationException( "No Constructor for " + className, EMPTY_OBJECT_ARRAY );
+ }
+
+ /**
+ * @param className simple name of a cr Collection Reader class
+ * @return discovered class for a cr
+ */
+ private Class<? extends CollectionReader> getPackagedReader( final String className ) {
+ Class readerClass;
+ for ( String packageName : _userPackages ) {
+ readerClass = getPackagedClass( packageName, className, CollectionReader.class );
+ if ( readerClass != null ) {
+ return readerClass;
+ }
+ }
+ for ( String packageName : CTAKES_PACKAGES ) {
+ readerClass = getPackagedClass(
+ "org.apache.ctakes." + packageName + ".cr", className, CollectionReader.class );
+ if ( readerClass != null ) {
+ return readerClass;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @param packageName possible package for class
+ * @param className simple name for class
+ * @param wantedClassType desired superclass type
+ * @return discovered class or null if no proper class was discovered
+ */
+ static private Class<?> getPackagedClass( final String packageName, final String className,
+ final Class<?> wantedClassType ) {
+ try {
+ Class<?> classType = Class.forName( packageName + "." + className );
+ if ( isClassType( classType, wantedClassType ) ) {
+ return classType;
+ }
+ } catch ( ClassNotFoundException cnfE ) {
+ // do nothing
+ }
+ return null;
+ }
+
+ /**
+ * @param classType class type to test
+ * @param wantedClassType wanted class type
+ * @throws ResourceInitializationException if the class type does not extend the wanted class type
+ */
+ static private void assertClassType( final Class<?> classType, final Class<?> wantedClassType )
+ throws ResourceInitializationException {
+ if ( !isClassType( classType, wantedClassType ) ) {
+ throw new ResourceInitializationException(
+ "Not " + wantedClassType.getSimpleName() + " " + classType.getName(), EMPTY_OBJECT_ARRAY );
+ }
+ }
+
+ /**
+ * @param classType class type to test
+ * @param wantedClassType wanted class type
+ * @return true if the class type extends the wanted class type
+ */
+ static private boolean isClassType( final Class<?> classType, final Class<?> wantedClassType ) {
+ return wantedClassType.isAssignableFrom( classType );
+ }
+
+ /**
+ * @param parameter text
+ * @return array created by splitting text at '|' characters
+ */
+ static private String[] getStrings( final String parameter ) {
+ return SPLIT_PATTERN.split( parameter );
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java Tue Oct 11 02:06:22 2016
@@ -85,22 +85,27 @@ final public class DocumentIDAnnotationU
static public String getDeepDocumentId( final JCas startingJcas ) {
String documentID = getDocumentID( startingJcas );
if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
+ LOGGER.debug( "Checking document Id for initial view" );
try {
- LOGGER.debug( "Checking document Id for initial view" );
final JCas viewJcas = startingJcas.getView( "_InitialView" );
documentID = DocumentIDAnnotationUtil.getDocumentID( viewJcas );
+ } catch ( CASException | CASRuntimeException casE ) {
+ LOGGER.warn( casE.getMessage() );
+ documentID = NO_DOCUMENT_ID;
+ }
+ if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
+ LOGGER.debug( "Checking document Id for plaintext view" );
+ try {
+ final JCas viewJcas = startingJcas.getView( "plaintext" );
+ documentID = DocumentIDAnnotationUtil.getDocumentID( viewJcas );
+ } catch ( CASException | CASRuntimeException casE ) {
+ LOGGER.warn( casE.getMessage() );
+ documentID = NO_DOCUMENT_ID;
+ }
if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
- LOGGER.debug( "Checking document Id for plaintext view" );
- final JCas plaintextJcas = startingJcas.getView( "plaintext" );
- documentID = DocumentIDAnnotationUtil.getDocumentID( plaintextJcas );
- if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
- LOGGER.warn( "Unable to find DocumentIDAnnotation" );
- return NO_DOCUMENT_ID;
- }
+ LOGGER.warn( "Unable to find DocumentIDAnnotation" );
+ return NO_DOCUMENT_ID;
}
- } catch ( CASException casE ) {
- LOGGER.warn( "Unable to find DocumentIDAnnotation", casE );
- return NO_DOCUMENT_ID;
}
}
return documentID;
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java Tue Oct 11 02:06:22 2016
@@ -173,6 +173,14 @@ final public class OntologyConceptUtil {
/**
* @param jcas -
+ * @return map of all cuis in the jcas and their counts
+ */
+ static public Map<String, Long> getCuiCounts( final JCas jcas ) {
+ return getCuiCounts( JCasUtil.select( jcas, IdentifiedAnnotation.class ) );
+ }
+
+ /**
+ * @param jcas -
* @return set of all tuis in jcas
*/
static public Collection<String> getTuis( final JCas jcas ) {
@@ -275,6 +283,17 @@ final public class OntologyConceptUtil {
}
/**
+ * @param annotations -
+ * @return map of all Umls cuis associated with the annotations and the counts of those cuis
+ */
+ static public Map<String, Long> getCuiCounts( final Collection<IdentifiedAnnotation> annotations ) {
+ return annotations.stream()
+ .map( OntologyConceptUtil::getCuis )
+ .flatMap( Collection::stream )
+ .collect( Collectors.groupingBy( Function.identity(), Collectors.counting() ) );
+ }
+
+ /**
* @param annotations -
* @return set of all Umls tuis associated with the annotation
*/
Modified: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java?rev=1764190&r1=1764189&r2=1764190&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java (original)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleAggregatePipeline.java Tue Oct 11 02:06:22 2016
@@ -1,17 +1,19 @@
package org.apache.ctakes.examples.pipelines;
-import java.io.FileWriter;
-
import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
import org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.uima.jcas.JCas;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.io.FileWriter;
/**
+ * Build and run a pipeline using uimafit {@link AggregateBuilder}, {@link SimplePipeline} and {@link JCasUtil}
+ *
* Example of a running a pipeline programatically w/o uima xml descriptor xml files
* Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
*
Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleBuilderPipeline.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,60 @@
+package org.apache.ctakes.examples.pipelines;
+
+
+import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
+import org.apache.ctakes.core.pipeline.EntityCollector;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
+import org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+
+import java.io.IOException;
+
+/**
+ * Build and run a pipeline using a {@link PipelineBuilder}.
+ * <p>
+ * Example of a running a pipeline programatically w/o uima xml descriptor xml files
+ * Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class ExampleBuilderPipeline {
+
+ static private final Logger LOGGER = Logger.getLogger( "ExampleBuilderPipeline" );
+
+ private ExampleBuilderPipeline() {
+ }
+
+ /**
+ * @param args an output directory for xmi files or none if xmi files are not wanted
+ */
+ public static void main( final String... args ) {
+ final String text = "Hello World!";
+ try {
+ PipelineBuilder builder = new PipelineBuilder();
+ builder
+ // Add a simple pre-defined existing pipeline for Tokenization
+ // Could also add engines individually
+ .addDescription( ClinicalPipelineFactory.getTokenProcessingPipeline() )
+ // Add the new HelloWorld Example
+ .add( ExampleHelloWorldAnnotator.class )
+ // Collect the Entities
+ .collectEntities();
+ if ( args.length > 0 ) {
+ //Example to save the Aggregate descriptor to an xml file for external
+ //use such as the UIMA CVD/CPE
+ builder.writeXMIs( args[ 0 ] );
+ }
+ // Run the pipeline with specified text
+ builder.run( text );
+ } catch ( IOException | UIMAException multE ) {
+ LOGGER.error( multE.getMessage() );
+ }
+ //Print out the IdentifiedAnnotation objects
+ LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ExampleReaderPipeline.java Tue Oct 11 02:06:22 2016
@@ -0,0 +1,63 @@
+package org.apache.ctakes.examples.pipelines;
+
+
+import org.apache.ctakes.core.pipeline.EntityCollector;
+import org.apache.ctakes.core.pipeline.PipelineBuilder;
+import org.apache.ctakes.core.pipeline.PipelineReader;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.log4j.Logger;
+import org.apache.uima.UIMAException;
+
+import java.io.IOException;
+
+/**
+ * Build and run a pipeline using a {@link PipelineReader} and a {@link PipelineBuilder}.
+ * <p>
+ * Example of a running a pipeline programatically w/o uima xml descriptor xml files
+ * Adds the default Tokenization pipeline and adding the Example HelloWorld Annotator
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/10/2016
+ */
+final public class ExampleReaderPipeline {
+
+ static private final Logger LOGGER = Logger.getLogger( "ExampleReaderPipeline" );
+
+ static private final String PIPELINE_1_PATH = "org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt";
+ static private final String PIPELINE_2_PATH = "org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt";
+
+ private ExampleReaderPipeline() {
+ }
+
+ /**
+ * @param args an output directory for xmi files or none if xmi files are not wanted
+ */
+ public static void main( final String... args ) {
+ final String text = "Hello World!";
+ try {
+ // Add a simple pre-defined existing pipeline for Tokenization from file
+ final PipelineReader reader = new PipelineReader( PIPELINE_1_PATH );
+ // add the POS Tagger manually
+ PipelineBuilder builder = reader.getBuilder();
+ builder.addDescription( POSTagger.createAnnotatorDescription() );
+ // Add the new HelloWorld Example by reading from file
+ reader.loadPipelineFile( PIPELINE_2_PATH );
+ // Collect the Entities
+ builder.collectEntities();
+ if ( args.length > 0 ) {
+ //Example to save the Aggregate descriptor to an xml file for external
+ //use such as the UIMA CVD/CPE
+ builder.writeXMIs( args[ 0 ] );
+ }
+ // Run the pipeline with specified text
+ builder.run( text );
+ } catch ( IOException | UIMAException multE ) {
+ LOGGER.error( multE.getMessage() );
+ }
+ //Print out the IdentifiedAnnotation objects
+ LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt (added)
+++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt Tue Oct 11 02:06:22 2016
@@ -0,0 +1,8 @@
+// Equivalent of ClinicalPipelineFactory.getTokenProcessingPipeline()
+add SimpleSegmentAnnotator
+add SentenceDetector
+add TokenizerAnnotatorPTB
+add ContextDependentTokenizerAnnotator
+
+// The POSTagger has a -complex- startup and should be added manually
+# add POSTagger
Added: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt?rev=1764190&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt (added)
+++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline2.txt Tue Oct 11 02:06:22 2016
@@ -0,0 +1,4 @@
+// After -manual- addition of POSTagger
+// Can use addPackage then add, or just use add with the fully-specified class and package
+addPackage org.apache.ctakes.examples.ae
+add ExampleHelloWorldAnnotator