You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/03/17 17:15:52 UTC

svn commit: r1787454 - /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java

Author: seanfinan
Date: Fri Mar 17 17:15:52 2017
New Revision: 1787454

URL: http://svn.apache.org/viewvc?rev=1787454&view=rev
Log:
Writes Span and covered text for discovered annotations

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java?rev=1787454&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TextSpanWriter.java Fri Mar 17 17:15:52 2017
@@ -0,0 +1,161 @@
+package org.apache.ctakes.core.cc;
+
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.refsem.Entity;
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.fit.component.CasConsumer_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+import java.util.Collection;
+
+import static org.apache.ctakes.core.config.ConfigParameterConstants.DESC_OUTPUTDIR;
+import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/28/2015
+ */
+@PipeBitInfo(
+      name = "Text Span Writer",
+      description = "Writes BSV files with original text for extracted annotations and their span offsets.",
+      role = PipeBitInfo.Role.WRITER,
+      input = "IdentifiedAnnotation " + PipeBitInfo.POPULATED_JCAS,
+      output = PipeBitInfo.NO_OUTPUT
+)
+public class TextSpanWriter extends CasConsumer_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "TextSpanWriter" );
+
+   static private final String FILE_EXTENSION = ".textspan.bsv";
+
+   @ConfigurationParameter(
+         name = PARAM_OUTPUTDIR,
+         mandatory = false,
+         description = DESC_OUTPUTDIR,
+         defaultValue = ""
+   )
+   private String fitOutputDirectoryPath;
+
+   private String _outputDirPath;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+      super.initialize( uimaContext );
+      try {
+         if ( fitOutputDirectoryPath == null ) {
+            fitOutputDirectoryPath = (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR );
+         }
+         if ( fitOutputDirectoryPath != null ) {
+            setOutputDirectory( fitOutputDirectoryPath );
+         }
+      } catch ( IllegalArgumentException | SecurityException multE ) {
+         // thrown if the path specifies a File (not Dir) or by file system access methods
+         throw new ResourceInitializationException( multE );
+      }
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
+      JCas jcas;
+      try {
+         jcas = aCAS.getJCas();
+      } catch ( CASException casE ) {
+         throw new AnalysisEngineProcessException( casE );
+      }
+      process( jcas );
+   }
+
+
+   /**
+    * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+    * @throws IllegalArgumentException if the provided path points to a File and not a Directory
+    * @throws SecurityException        if the File System has issues
+    */
+   public void setOutputDirectory( final String outputDirectoryPath ) throws IllegalArgumentException,
+                                                                             SecurityException {
+      // If no outputDir is specified (null or empty) the current working directory will be used.  Else check path.
+      if ( outputDirectoryPath == null || outputDirectoryPath.isEmpty() ) {
+         _outputDirPath = "";
+         LOGGER.debug( "No Output Directory Path specified, using current working directory "
+                       + System.getProperty( "user.dir" ) );
+         return;
+      }
+      final File outputDir = new File( outputDirectoryPath );
+      if ( !outputDir.exists() ) {
+         outputDir.mkdirs();
+      }
+      if ( !outputDir.isDirectory() ) {
+         throw new IllegalArgumentException( outputDirectoryPath + " is not a valid directory path" );
+      }
+      _outputDirPath = outputDirectoryPath;
+      LOGGER.debug( "Output Directory Path set to " + _outputDirPath );
+   }
+
+
+   /**
+    * Process the jcas and write pretty sentences to file.  Filename is based upon the document id stored in the cas
+    *
+    * @param jcas ye olde ...
+    */
+   public void process( final JCas jcas ) {
+      LOGGER.info( "Starting processing" );
+      final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas );
+      File outputFile;
+      if ( _outputDirPath == null || _outputDirPath.isEmpty() ) {
+         outputFile = new File( docId + FILE_EXTENSION );
+      } else {
+         outputFile = new File( _outputDirPath, docId + FILE_EXTENSION );
+      }
+      try ( final Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+         final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( jcas, IdentifiedAnnotation.class );
+         for ( IdentifiedAnnotation annotation : annotations ) {
+            writeAnnotation( annotation, writer );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Could not not write text span file " + outputFile.getPath() );
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "Finished processing" );
+   }
+
+   /**
+    * Write a sentence from the document text
+    *
+    * @param annotation annotation containing the sentence
+    * @param writer     writer to which pretty text for the sentence should be written
+    * @throws IOException if the writer has issues
+    */
+   static public void writeAnnotation( final AnnotationFS annotation,
+                                       final Writer writer ) throws IOException {
+      if ( !(annotation instanceof Event || annotation instanceof Entity) ) {
+         return;
+      }
+      writer.write( annotation.getClass().getName()
+                    + "|" + annotation.getBegin() + "," + annotation.getEnd()
+                    + "|" + annotation.getCoveredText() );
+      writer.write( "\n" );
+   }
+
+
+}