You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/08/08 18:41:52 UTC

svn commit: r1755525 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: ae/PropertyAeFactory.java ae/StartFinishLogger.java cc/CuiCountFileWriter.java

Author: seanfinan
Date: Mon Aug  8 18:41:51 2016
New Revision: 1755525

URL: http://svn.apache.org/viewvc?rev=1755525&view=rev
Log:
StartFinishLogger : Simple AE that can wrap suboptimally logged AEs and log Start and Finish
PropertyAeFactory : Can use java .properties files to create AEs
CuiCountFileWriter : writes two bar-separated columns; Cui and cui count

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/PropertyAeFactory.java Mon Aug  8 18:41:51 2016
@@ -0,0 +1,151 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * There may be some way to get values directly into the root UimaContext.
+ * This factory can load plain old java properties files and pass the specified properties as parameters for AE creation
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 8/8/2016
+ */
+public enum PropertyAeFactory {
+   INSTANCE;
+
+   static public PropertyAeFactory getInstance() {
+      return INSTANCE;
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "PropertyAeFactory" );
+
+
+   final private Map<String, String> _properties = new HashMap<>();
+
+   synchronized public void loadPropertyFile( final String filePath ) {
+      try ( InputStream stream = FileLocator.getAsStream( filePath ) ) {
+         final Properties properties = new Properties();
+         properties.load( stream );
+         for ( String name : properties.stringPropertyNames() ) {
+            final String value = properties.getProperty( name );
+            if ( value == null ) {
+               LOGGER.warn( "Property has no value: " + name );
+            } else {
+               _properties.put( name, value );
+            }
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Property File not found: " + filePath );
+      }
+   }
+
+   static private Object[] createParameters( final Map<String, String> parameterMap ) {
+      final Object[] parameters = new Object[ parameterMap.size() * 2 ];
+      int i = 0;
+      for ( Map.Entry<String, String> entry : parameterMap.entrySet() ) {
+         parameters[ i ] = entry.getKey();
+         parameters[ i + 1 ] = entry.getValue();
+         i += 2;
+      }
+      return parameters;
+   }
+
+   /**
+    * @param parameters parameters possibly not loaded by this factory
+    * @return new parameter arrays containing parameters loaded by this factory and followed by specified parameters
+    */
+   synchronized private Object[] getAllParameters( final Object... parameters ) {
+      if ( _properties.isEmpty() ) {
+         return parameters;
+      }
+      if ( parameters == null || parameters.length == 0 ) {
+         return createParameters( _properties );
+      }
+      final Map<String, String> parameterMap = new HashMap<>( _properties );
+      for ( int i = 0; i < parameters.length; i += 2 ) {
+         parameterMap.put( parameters[ i ].toString(), parameters[ i + 1 ].toString() );
+      }
+      return createParameters( parameterMap );
+   }
+
+   /**
+    * This method should be avoided.  See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+    *
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Engine with specified parameters plus those loaded from properties
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public AnalysisEngine createEngine( final Class<? extends AnalysisComponent> classType,
+                                       final Object... parameters )
+         throws ResourceInitializationException {
+      final AnalysisEngineDescription description = createDescription( classType, parameters );
+      final Object allParameters = getAllParameters( parameters );
+      return AnalysisEngineFactory.createEngine( description, allParameters );
+   }
+
+   /**
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Description with specified parameters plus those loaded from properties
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public AnalysisEngineDescription createDescription( final Class<? extends AnalysisComponent> classType,
+                                                       final Object... parameters )
+         throws ResourceInitializationException {
+      final Object allParameters = getAllParameters( parameters );
+      return AnalysisEngineFactory.createEngineDescription( classType, allParameters );
+   }
+
+   /**
+    * This method should be avoided.  See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+    *
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Engine with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public AnalysisEngine createLoggedEngine( final Class<? extends AnalysisComponent> classType,
+                                             final Object... parameters )
+         throws ResourceInitializationException {
+      final Object allParameters = getAllParameters( parameters );
+      return StartFinishLogger.createLoggedEngine( classType, allParameters );
+   }
+
+   /**
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public AnalysisEngineDescription createLoggedDescription( final Class<? extends AnalysisComponent> classType,
+                                                             final Object... parameters )
+         throws ResourceInitializationException {
+      final Object allParameters = getAllParameters( parameters );
+      return StartFinishLogger.createLoggedDescription( classType, allParameters );
+   }
+
+   /**
+    * @param mainDescription main component description
+    * @return Description with specified parameters plus those loaded from properties that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+         throws ResourceInitializationException {
+      return StartFinishLogger.createLoggedDescription( mainDescription );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/StartFinishLogger.java Mon Aug  8 18:41:51 2016
@@ -0,0 +1,140 @@
+package org.apache.ctakes.core.ae;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * All Annotation Engines should be logger their start and finish.
+ * Such logging not only keeps track of what is actually in the pipeline, but it also helps with debugging and profiling
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 8/8/2016
+ */
+public class StartFinishLogger extends JCasAnnotator_ImplBase {
+
+
+   public static final String PARAM_LOGGER_NAME = "LOGGER_NAME";
+   @ConfigurationParameter(
+         name = PARAM_LOGGER_NAME,
+         mandatory = true,
+         description = "provides the full name of the Annotator Engine for which start / end logging should be done.",
+         defaultValue = { "StartEndProgressLogger" }
+   )
+   private String _loggerName;
+
+   public static final String PARAM_IS_START = "IS_START";
+   @ConfigurationParameter(
+         name = PARAM_IS_START,
+         mandatory = false,
+         description = "indicates whether this should log a start."
+   )
+   private Boolean _isStart;
+
+   private Logger _logger;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext context )
+         throws ResourceInitializationException {
+      super.initialize( context );
+      _logger = Logger.getLogger( _loggerName );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+      if ( _isStart ) {
+         _logger.info( "Starting processing" );
+      } else {
+         _logger.info( "Finished processing" );
+      }
+   }
+
+   /**
+    * This method should be avoided.  See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+    *
+    * @param loggerName name for the logger
+    * @param isStart    true to return an Engine that logs the Start, false to return an Engine that logs Finish
+    * @return Simple Start/Finish Logger Engine
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public static AnalysisEngine createEngine( final String loggerName, final boolean isStart )
+         throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngine( StartFinishLogger.class,
+            PARAM_LOGGER_NAME, loggerName,
+            PARAM_IS_START, isStart );
+   }
+
+   /**
+    * @param loggerName name for the logger
+    * @param isStart    true to return an Engine that logs the Start, false to return an Engine that logs Finish
+    * @return Simple Start/Finish Logger Engine
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public static AnalysisEngineDescription createDescription( final String loggerName, final boolean isStart )
+         throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngineDescription( StartFinishLogger.class,
+            PARAM_LOGGER_NAME, loggerName,
+            PARAM_IS_START, isStart );
+   }
+
+
+   /**
+    * This method should be avoided.  See the bottom of https://uima.apache.org/d/uimafit-current/api/index.html
+    *
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Engine that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public static AnalysisEngine createLoggedEngine( final Class<? extends AnalysisComponent> classType,
+                                                    final Object... parameters )
+         throws ResourceInitializationException {
+      final AnalysisEngineDescription description = createLoggedDescription( classType, parameters );
+      return AnalysisEngineFactory.createEngine( description, parameters );
+   }
+
+   /**
+    * @param classType  main component
+    * @param parameters parameters for the main component
+    * @return Description that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public static AnalysisEngineDescription createLoggedDescription( final Class<? extends AnalysisComponent> classType,
+                                                                    final Object... parameters )
+         throws ResourceInitializationException {
+      final AnalysisEngineDescription mainDescription
+            = AnalysisEngineFactory.createEngineDescription( classType, parameters );
+      return createLoggedDescription( mainDescription );
+   }
+
+   /**
+    * @param mainDescription main component description
+    * @return Description that is wrapped with a simple Logger AE that logs the Start and Finish of the process
+    * @throws ResourceInitializationException if UimaFit has a problem
+    */
+   public static AnalysisEngineDescription createLoggedDescription( final AnalysisEngineDescription mainDescription )
+         throws ResourceInitializationException {
+      final String name = mainDescription.getAnnotatorImplementationName();
+      return org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription(
+            createDescription( name, true ),
+            mainDescription,
+            createDescription( name, false ) );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java?rev=1755525&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiCountFileWriter.java Mon Aug  8 18:41:51 2016
@@ -0,0 +1,154 @@
+package org.apache.ctakes.core.cc;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.CasConsumer_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.core.config.ConfigParameterConstants.DESC_OUTPUTDIR;
+import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR;
+
+/**
+ * Writes a file with two bar-separated columns, the first is the cui, the second the number of cuis in the document.
+ * Negated concepts have cuis preceded by a negative sign
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/29/2016
+ */
+public class CuiCountFileWriter extends CasConsumer_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "CuiCountFileWriter" );
+
+   static private final String FILE_EXTENSION = ".cuicount.bsv";
+
+   @ConfigurationParameter(
+         name = PARAM_OUTPUTDIR,
+         mandatory = false,
+         description = DESC_OUTPUTDIR,
+         defaultValue = ""
+   )
+   private String fitOutputDirectoryPath;
+
+   private String _outputDirPath;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+      super.initialize( uimaContext );
+      try {
+         if ( fitOutputDirectoryPath == null ) {
+            fitOutputDirectoryPath = (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR );
+         }
+         if ( fitOutputDirectoryPath != null ) {
+            setOutputDirectory( fitOutputDirectoryPath );
+         }
+      } catch ( IllegalArgumentException | SecurityException multE ) {
+         // thrown if the path specifies a File (not Dir) or by file system access methods
+         throw new ResourceInitializationException( multE );
+      }
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
+      JCas jcas;
+      try {
+         jcas = aCAS.getJCas();
+      } catch ( CASException casE ) {
+         throw new AnalysisEngineProcessException( casE );
+      }
+      process( jcas );
+   }
+
+
+   /**
+    * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+    * @throws IllegalArgumentException if the provided path points to a File and not a Directory
+    * @throws SecurityException        if the File System has issues
+    */
+   public void setOutputDirectory( final String outputDirectoryPath ) throws IllegalArgumentException,
+                                                                             SecurityException {
+      // If no outputDir is specified (null or empty) the current working directory will be used.  Else check path.
+      if ( outputDirectoryPath == null || outputDirectoryPath.isEmpty() ) {
+         _outputDirPath = "";
+         LOGGER.debug( "No Output Directory Path specified, using current working directory "
+                       + System.getProperty( "user.dir" ) );
+         return;
+      }
+      final File outputDir = new File( outputDirectoryPath );
+      if ( !outputDir.exists() ) {
+         outputDir.mkdirs();
+      }
+      if ( !outputDir.isDirectory() ) {
+         throw new IllegalArgumentException( outputDirectoryPath + " is not a valid directory path" );
+      }
+      _outputDirPath = outputDirectoryPath;
+      LOGGER.debug( "Output Directory Path set to " + _outputDirPath );
+   }
+
+
+   /**
+    * Process the jcas and write cuis and their counts.  Filename is based upon the document id stored in the cas
+    *
+    * @param jcas ye olde ...
+    */
+   public void process( final JCas jcas ) {
+      LOGGER.info( "Starting processing" );
+      final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas );
+      File outputFile;
+      if ( _outputDirPath == null || _outputDirPath.isEmpty() ) {
+         outputFile = new File( docId + FILE_EXTENSION );
+      } else {
+         outputFile = new File( _outputDirPath, docId + FILE_EXTENSION );
+      }
+      final Map<String, Integer> positiveCounts = JCasUtil.select( jcas, IdentifiedAnnotation.class )
+            .stream()
+            .filter( a -> a.getPolarity() != CONST.NE_POLARITY_NEGATION_PRESENT )
+            .map( OntologyConceptUtil::getCuis )
+            .flatMap( Collection::stream )
+            .sorted()
+            .collect( Collectors.groupingBy( Function.identity(), Collectors.reducing( 0, c -> 1, Integer::sum ) ) );
+      final Map<String, Integer> negativeCounts = JCasUtil.select( jcas, IdentifiedAnnotation.class )
+            .stream()
+            .filter( a -> a.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT )
+            .map( OntologyConceptUtil::getCuis )
+            .flatMap( Collection::stream )
+            .sorted()
+            .collect( Collectors.groupingBy( Function.identity(), Collectors.reducing( 0, c -> 1, Integer::sum ) ) );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+         for ( Map.Entry<String, Integer> cuiCount : positiveCounts.entrySet() ) {
+            writer.write( cuiCount.getKey() + "|" + cuiCount.getValue() + "\n" );
+         }
+         for ( Map.Entry<String, Integer> cuiCount : negativeCounts.entrySet() ) {
+            writer.write( "-" + cuiCount.getKey() + "|" + cuiCount.getValue() + "\n" );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Could not not write cui count file " + outputFile.getPath() );
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "Finished processing" );
+   }
+
+}