You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/08/08 18:41:52 UTC
svn commit: r1755525 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core:
ae/PropertyAeFactory.java ae/StartFinishLogger.java
cc/CuiCountFileWriter.java
Author: seanfinan
Date: Mon Aug 8 18:41:51 2016
New Revision: 1755525
URL: http://svn.apache.org/viewvc?rev=1755525&view=rev
Log:
StartFinishLogger : Simple AE that can wrap suboptimally logged AEs and log Start and Finish
PropertyAeFactory : Can use java .properties files to create AEs
CuiCountFileWriter : writes two bar-separated columns; Cui and cui count
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java Mon Aug 8 18:41:51 2016
@@ -0,0 +1,151 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * There may be some way to get values directly into the root UimaContext.
+ * This factory can load plain old java properties files and pass the specified properties as parameters for AE creation
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 8/8/2016
+ */
+public enum PropertyAeFactory {
+ INSTANCE;
+
+ static public PropertyAeFactory getInstance() {
+ return INSTANCE;
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "PropertyAeFactory" );
+
+
+ final private Map<String, String> _properties = new HashMap<>();
+
+ synchronized public void loadPropertyFile( final String filePath ) {
+ try ( InputStream stream = FileLocator.getAsStream( filePath ) ) {
+ final Properties properties = new Properties();
+ properties.load( stream );
+ for ( String name : properties.stringPropertyNames() ) {
+ final String value = properties.getProperty( name );
+ if ( value == null ) {
+ LOGGER.warn( "Property has no value: " + name );
+ } else {
+ _properties.put( name, value );
+ }
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( "Property File not found: " + filePath );
+ }
+ }
+
+ static private Object[] createParameters( final Map<String, String> parameterMap ) {
+ final Object[] parameters = new Object[ parameterMap.size() * 2 ];
+ int i = 0;
+ for ( Map.Entry<String, String> entry : parameterMap.entrySet() ) {
+ parameters[ i ] = entry.getKey();
+ parameters[ i + 1 ] = entry.getValue();
+ i += 2;
+ }
+ return parameters;
+ }
+
+ /**
+ * @param parameters parameters possibly not loaded by this factory
+ * @return new parameter arrays containing parameters loaded by this factory and followed by specified parameters
+ */
+ synchronized private Object[] getAllParameters( final Object... parameters ) {
+ if ( _properties.isEmpty() ) {
+ return parameters;
+ }
+ if ( parameters == null || parameters.length == 0 ) {
+ return createParameters( _properties );
+ }
+ final Map<String, String> parameterMap = new HashMap<>( _properties );
+ for ( int i = 0; i < parameters.length; i += 2 ) {
+ parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ].toString() );
+ }
+ return createParameters( parameterMap );
+ }
+
+ /**
+ * This method should be avoided. See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+ *
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Engine with specified parameters plus those loaded from properties
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public AnalysisEngine createEngine( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final AnalysisEngineDescription description = createDescription( classType, parameters );
+ final Object allParameters = getAllParameters( parameters );
+ return AnalysisEngineFactory.createEngine( description, allParameters );
+ }
+
+ /**
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Description with specified parameters plus those loaded from properties
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public AnalysisEngineDescription createDescription( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final Object allParameters = getAllParameters( parameters );
+ return AnalysisEngineFactory.createEngineDescription( classType, allParameters );
+ }
+
+ /**
+ * This method should be avoided. See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+ *
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Engine with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public AnalysisEngine createLoggedEngine( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final Object allParameters = getAllParameters( parameters );
+ return StartFinishLogger.createLoggedEngine( classType, allParameters );
+ }
+
+ /**
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public AnalysisEngineDescription createLoggedDescription( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final Object allParameters = getAllParameters( parameters );
+ return StartFinishLogger.createLoggedDescription( classType, allParameters );
+ }
+
+ /**
+ * @param mainDescription main component description
+ * @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+ throws ResourceInitializationException {
+ return StartFinishLogger.createLoggedDescription( mainDescription );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java Mon Aug 8 18:41:51 2016
@@ -0,0 +1,140 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * All Annotation Engines should be logger their start and finish.
+ * Such logging not only keeps track of what is actually in the pipeline, but it also helps with debugging and profiling
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 8/8/2016
+ */
+public class StartFinishLogger extends JCasAnnotator_ImplBase {
+
+
+ public static final String PARAM_LOGGER_NAME = "LOGGER_NAME";
+ @ConfigurationParameter(
+ name = PARAM_LOGGER_NAME,
+ mandatory = true,
+ description = "provides the full name of the Annotator Engine for which start / end logging should be done.",
+ defaultValue = { "StartEndProgressLogger" }
+ )
+ private String _loggerName;
+
+ public static final String PARAM_IS_START = "IS_START";
+ @ConfigurationParameter(
+ name = PARAM_IS_START,
+ mandatory = false,
+ description = "indicates whether this should log a start."
+ )
+ private Boolean _isStart;
+
+ private Logger _logger;
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context )
+ throws ResourceInitializationException {
+ super.initialize( context );
+ _logger = Logger.getLogger( _loggerName );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+ if ( _isStart ) {
+ _logger.info( "Starting processing" );
+ } else {
+ _logger.info( "Finished processing" );
+ }
+ }
+
+ /**
+ * This method should be avoided. See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+ *
+ * @param loggerName name for the logger
+ * @param isStart true to return an Engine that logs the Start, false to return an Engine that logs Finish
+ * @return Simple Start/Finish Logger Engine
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public static AnalysisEngine createEngine( final String loggerName, final boolean isStart )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngine( StartFinishLogger.class,
+ PARAM_LOGGER_NAME, loggerName,
+ PARAM_IS_START, isStart );
+ }
+
+ /**
+ * @param loggerName name for the logger
+ * @param isStart true to return an Engine that logs the Start, false to return an Engine that logs Finish
+ * @return Simple Start/Finish Logger Engine
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public static AnalysisEngineDescription createDescription( final String loggerName, final boolean isStart )
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription( StartFinishLogger.class,
+ PARAM_LOGGER_NAME, loggerName,
+ PARAM_IS_START, isStart );
+ }
+
+
+ /**
+ * This method should be avoided. See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+ *
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Engine that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public static AnalysisEngine createLoggedEngine( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final AnalysisEngineDescription description = createLoggedDescription( classType, parameters );
+ return AnalysisEngineFactory.createEngine( description, parameters );
+ }
+
+ /**
+ * @param classType main component
+ * @param parameters parameters for the main component
+ * @return Description that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public static AnalysisEngineDescription createLoggedDescription( final Class<? extends AnalysisComponent> classType,
+ final Object... parameters )
+ throws ResourceInitializationException {
+ final AnalysisEngineDescription mainDescription
+ = AnalysisEngineFactory.createEngineDescription( classType, parameters );
+ return createLoggedDescription( mainDescription );
+ }
+
+ /**
+ * @param mainDescription main component description
+ * @return Description that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+ * @throws ResourceInitializationException if UimaFit has a problem
+ */
+ public static AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+ throws ResourceInitializationException {
+ final String name = mainDescription.getAnnotatorImplementationName();
+ return org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription(
+ createDescription( name, true ),
+ mainDescription,
+ createDescription( name, false ) );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java Mon Aug 8 18:41:51 2016
@@ -0,0 +1,154 @@
+package org.apache.ctakes.core.cc;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.CasConsumer_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.core.config.ConfigParameterConstants.DESC_OUTPUTDIR;
+import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR;
+
+/**
+ * Writes a file with two bar-separated columns, the first is the cui, the second the number of cuis in the document.
+ * Negated concepts have cuis preceded by a negative sign
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/29/2016
+ */
+public class CuiCountFileWriter extends CasConsumer_ImplBase {
+
+ static private final Logger LOGGER = Logger.getLogger( "CuiCountFileWriter" );
+
+ static private final String FILE_EXTENSION = ".cuicount.bsv";
+
+ @ConfigurationParameter(
+ name = PARAM_OUTPUTDIR,
+ mandatory = false,
+ description = DESC_OUTPUTDIR,
+ defaultValue = ""
+ )
+ private String fitOutputDirectoryPath;
+
+ private String _outputDirPath;
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+ super.initialize( uimaContext );
+ try {
+ if ( fitOutputDirectoryPath == null ) {
+ fitOutputDirectoryPath = (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR );
+ }
+ if ( fitOutputDirectoryPath != null ) {
+ setOutputDirectory( fitOutputDirectoryPath );
+ }
+ } catch ( IllegalArgumentException | SecurityException multE ) {
+ // thrown if the path specifies a File (not Dir) or by file system access methods
+ throw new ResourceInitializationException( multE );
+ }
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
+ JCas jcas;
+ try {
+ jcas = aCAS.getJCas();
+ } catch ( CASException casE ) {
+ throw new AnalysisEngineProcessException( casE );
+ }
+ process( jcas );
+ }
+
+
+ /**
+ * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+ * @throws IllegalArgumentException if the provided path points to a File and not a Directory
+ * @throws SecurityException if the File System has issues
+ */
+ public void setOutputDirectory( final String outputDirectoryPath ) throws IllegalArgumentException,
+ SecurityException {
+ // If no outputDir is specified (null or empty) the current working directory will be used. Else check path.
+ if ( outputDirectoryPath == null || outputDirectoryPath.isEmpty() ) {
+ _outputDirPath = "";
+ LOGGER.debug( "No Output Directory Path specified, using current working directory "
+ + System.getProperty( "user.dir" ) );
+ return;
+ }
+ final File outputDir = new File( outputDirectoryPath );
+ if ( !outputDir.exists() ) {
+ outputDir.mkdirs();
+ }
+ if ( !outputDir.isDirectory() ) {
+ throw new IllegalArgumentException( outputDirectoryPath + " is not a valid directory path" );
+ }
+ _outputDirPath = outputDirectoryPath;
+ LOGGER.debug( "Output Directory Path set to " + _outputDirPath );
+ }
+
+
+ /**
+ * Process the jcas and write cuis and their counts. Filename is based upon the document id stored in the cas
+ *
+ * @param jcas ye olde ...
+ */
+ public void process( final JCas jcas ) {
+ LOGGER.info( "Starting processing" );
+ final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas );
+ File outputFile;
+ if ( _outputDirPath == null || _outputDirPath.isEmpty() ) {
+ outputFile = new File( docId + FILE_EXTENSION );
+ } else {
+ outputFile = new File( _outputDirPath, docId + FILE_EXTENSION );
+ }
+ final Map<String, Integer> positiveCounts = JCasUtil.select( jcas, IdentifiedAnnotation.class )
+ .stream()
+ .filter( a -> a.getPolarity() != CONST.NE_POLARITY_NEGATION_PRESENT )
+ .map( OntologyConceptUtil::getCuis )
+ .flatMap( Collection::stream )
+ .sorted()
+ .collect( Collectors.groupingBy( Function.identity(), Collectors.reducing( 0, c -> 1, Integer::sum ) ) );
+ final Map<String, Integer> negativeCounts = JCasUtil.select( jcas, IdentifiedAnnotation.class )
+ .stream()
+ .filter( a -> a.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT )
+ .map( OntologyConceptUtil::getCuis )
+ .flatMap( Collection::stream )
+ .sorted()
+ .collect( Collectors.groupingBy( Function.identity(), Collectors.reducing( 0, c -> 1, Integer::sum ) ) );
+ try ( final Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+ for ( Map.Entry<String, Integer> cuiCount : positiveCounts.entrySet() ) {
+ writer.write( cuiCount.getKey() + "|" + cuiCount.getValue() + "\n" );
+ }
+ for ( Map.Entry<String, Integer> cuiCount : negativeCounts.entrySet() ) {
+ writer.write( "-" + cuiCount.getKey() + "|" + cuiCount.getValue() + "\n" );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( "Could not not write cui count file " + outputFile.getPath() );
+ LOGGER.error( ioE.getMessage() );
+ }
+ LOGGER.info( "Finished processing" );
+ }
+
+}