You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2018/12/24 21:47:07 UTC

svn commit: r1849704 - in /ctakes/trunk: ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/ ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/ ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/

Author: seanfinan
Date: Mon Dec 24 21:47:07 2018
New Revision: 1849704

URL: http://svn.apache.org/viewvc?rev=1849704&view=rev
Log:
CTAKES-530 : New Uimafitized Jdbc collection reader

Added:
    ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/
    ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/
    ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/JdbcSettings.piper
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/Decryptor.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/DecryptorWrapper.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/JdbcNotesReader.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/PassThroughDecryptor.java

Added: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/JdbcSettings.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/JdbcSettings.piper?rev=1849704&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/JdbcSettings.piper (added)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/cr/jdbc/JdbcSettings.piper Mon Dec 24 21:47:07 2018
@@ -0,0 +1,109 @@
+//
+//     The following are parameters that can be set for use by the JdbcNoteTableReader.
+//     The settings below are examples and should be changed.
+//     To use the JdbcNoteTableReader and your own settings,
+//     copy this file, edit to your details, and add two lines to the top of your piper file:
+//          load /my/dir/MyJdbcSettings.piper
+//          reader jdbc.JdbcNote
+//
+
+
+//////////     Mandatory     //////////
+
+//  JDBC driver ClassName.
+set DbDriver=com.ibm.db2.jcc.DB2Driver
+
+//  JDBC URL that specifies database network location and name.
+set DbUrl=jdbc:db2://localhost:0000/changeme
+
+//  Username for database authentication.
+set DbUser=changeme
+
+//  Password for database authentication.
+set DbPass=changeme
+
+//  SQL statement to retrieve the document.
+set SqlStatement="SELECT * FROM MY_TABLE"
+
+//  Name of column that contains the document text.
+set DocColumn=NOTE_TEXT
+
+
+
+//////////     Optional     //////////
+
+//  Flag that determines whether to keep JDBC connection open no matter what.
+# set KeepAlive="true"
+
+//  JDBC decryptor ClassName.  This must be an implementation of org.apache.ctakes.core.cr.jdbc.Decryptor
+# set DbDecryptor=org.apache.ctakes.core.cr.jdbc.PassThroughDecryptor
+
+//  Password for text decryption.
+# set DecryptPass=
+
+//  Specifies column names that will be used to form a document ID.
+# set IdColumns=PATIENT_ID,NOTE_ID
+
+//  Specifies delimiter used when document ID is built.
+# set IdDelimiter=_
+
+//  Name of column that contains the patient identifier.
+# set PatientColumn=PATIENT_ID
+
+//  Name of column that contains the patient id.  This column should contain longs.
+# set PatientIdColumn=PATIENT_NUM
+
+//  Name of column that contains the note type.
+# set NoteTypeColumn=
+
+//  Name of column that contains the note subtype.
+# set NoteSubtypeColumn=
+
+//  Name of column that contains the author specialty.
+# set SpecialtyColumn=
+
+//  Name of column that contains the document standard.
+# set StandardColumn=
+
+//  Name of column that contains the document instance id.  This column should contain longs.
+# set InstanceIdColumn=
+
+//  Name of column that contains the document revision number.  This column should contain integers.
+# set RevisionColumn=
+
+//  Name of column that contains the document revision date.  This column should contain dates.
+# set RevisionDateColumn=
+
+//  Name of column that contains the document original date.
+# set DateColumn=
+
+//  Name of column that contains the source institution.
+# set InstituteColumn=
+
+//  Name of column that contains the encounter id.
+# set EncounterIdColumn=
+
+//  Name of column that contains the patient birth date.  This column should contain dates.
+# set BirthColumn=
+
+//  Name of column that contains the patient death date.  This column should contain dates.
+# set DeathColumn=
+
+//  Name of column that contains the patient gender.
+# set GenderColumn=
+
+//  Name of column that contains the patient first name.
+# set FirstNameColumn=
+
+//  Name of column that contains the patient middle name.
+# set MiddleNameColumn=
+
+//  Name of column that contains the patient last name.
+# set LastNameColumn=
+
+//  Name of column that contains the patient first name soundex.
+# set FirstSoundexColumn=
+
+//  Name of column that contains the patient last name soundex.
+# set LastSoundexColumn=
+

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/Decryptor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/Decryptor.java?rev=1849704&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/Decryptor.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/Decryptor.java Mon Dec 24 21:47:07 2018
@@ -0,0 +1,22 @@
+package org.apache.ctakes.core.cr.jdbc;
+
+/**
+ * A Decryptor is required by the CollectionReader to get parsable text from encrypted notes
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 3/13/2015
+ */
+public interface Decryptor {
+
+   /**
+    * Used to decrypt text for a note
+    *
+    * @param key  used for decrypting the note
+    * @param note text to decrypt
+    * @return unencrypted text of the note
+    * //    * @throws Exception never thrown, just here to duplicate the (unspecific and poorly-formed) throw of the real class
+    */
+   public String decrypt( final String key, final String note );
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/DecryptorWrapper.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/DecryptorWrapper.java?rev=1849704&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/DecryptorWrapper.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/DecryptorWrapper.java Mon Dec 24 21:47:07 2018
@@ -0,0 +1,50 @@
+package org.apache.ctakes.core.cr.jdbc;
+
+
+import org.apache.log4j.Logger;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+/**
+ * External libraries will not implement Decryptor, but we must handle them somehow.
+ * This wraps a class that can decrypt in a Decryptor.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 3/13/2015
+ */
+final public class DecryptorWrapper implements Decryptor {
+
+   static private final Logger LOGGER = Logger.getLogger( "DecryptorWrapper" );
+
+   private final Object _decryptorThing;
+   private final Method _decryptionMethod;
+
+   /**
+    * @param decryptorThing   any object that has a proper decryption method
+    * @param decryptionMethod any method that accepts a key as the first parameter and note text as the second parameter
+    *                         and returns unencrypted note text
+    */
+   public DecryptorWrapper( final Object decryptorThing, final Method decryptionMethod ) {
+      _decryptorThing = decryptorThing;
+      _decryptionMethod = decryptionMethod;
+   }
+
+
+   /**
+    * Attempts to decrypt the note using a non-Decryptor that has a decryption method
+    * {@inheritDoc}
+    */
+   @Override
+   public String decrypt( final String key, final String note ) {
+      try {
+         return (String)_decryptionMethod.invoke( _decryptorThing, key, note );
+      } catch ( IllegalAccessException | InvocationTargetException multiE ) {
+         LOGGER.error( "Could not Decrypt Note using " + _decryptorThing.getClass().getName() );
+         return note;
+      }
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/JdbcNotesReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/JdbcNotesReader.java?rev=1849704&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/JdbcNotesReader.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/JdbcNotesReader.java Mon Dec 24 21:47:07 2018
@@ -0,0 +1,848 @@
+package org.apache.ctakes.core.cr.jdbc;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.structured.Demographics;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.ctakes.typesystem.type.structured.Metadata;
+import org.apache.ctakes.typesystem.type.structured.SourceData;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import sqlWrapper.WrappedConnection;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.sql.*;
+
+
+/**
+ * Collection Reader that pulls documents to be processed from a database.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/24/2018
+ */
+@PipeBitInfo(
+      name = "JDBC Note Table Reader",
+      description = "Reads document texts from database table's fields.",
+      role = PipeBitInfo.Role.READER,
+      products = { PipeBitInfo.TypeProduct.DOCUMENT_ID }
+)
+final public class JdbcNotesReader extends JCasCollectionReader_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "JdbcNoteTableReader" );
+
+
+   static public final String PARAM_DB_DRIVER = "DbDriver";
+   @ConfigurationParameter(
+         name = PARAM_DB_DRIVER,
+         description = "JDBC driver ClassName."
+   )
+   private String _dbDriver;
+
+   static public final String PARAM_DB_DECRYPTOR = "DbDecryptor";
+   @ConfigurationParameter(
+         name = PARAM_DB_DECRYPTOR,
+         description = "JDBC decryptor ClassName.",
+         mandatory = false
+   )
+   private String _dbDecryptor;
+
+   static public final String PARAM_DECRYPT_PASS = "DecryptPass";
+   @ConfigurationParameter(
+         name = PARAM_DECRYPT_PASS,
+         description = "Password for text decryption.",
+         mandatory = false
+   )
+   private String _decryptPass;
+
+   static public final String PARAM_DB_URL = "DbUrl";
+   @ConfigurationParameter(
+         name = PARAM_DB_URL,
+         description = "JDBC URL that specifies database network location and name."
+   )
+   private String _url;
+
+   static public final String PARAM_DB_USER = "DbUser";
+   @ConfigurationParameter(
+         name = PARAM_DB_USER,
+         description = "Username for database authentication."
+   )
+   private String _user;
+
+   static public final String PARAM_DB_PASS = "DbPass";
+   @ConfigurationParameter(
+         name = PARAM_DB_PASS,
+         description = "Password for database authentication."
+   )
+   private String _pass;
+
+   static public final String PARAM_KEEP_ALIVE = "KeepAlive";
+   @ConfigurationParameter(
+         name = PARAM_KEEP_ALIVE,
+         description = "Flag that determines whether to keep JDBC connection open no matter what.",
+         mandatory = false
+   )
+   private String _keepAlive;
+
+   static public final String PARAM_SQL = "SqlStatement";
+   @ConfigurationParameter(
+         name = PARAM_SQL,
+         description = "SQL statement to retrieve the document."
+   )
+   private String _sqlStatement;
+
+
+   /**
+    * Name of column from resultset that contains the document text. Supported
+    * column types are CHAR, VARCHAR, and CLOB.
+    */
+   static public final String PARAM_DOCTEXT_COL = "DocColumn";
+   @ConfigurationParameter(
+         name = PARAM_DOCTEXT_COL,
+         description = "Name of column that contains the document text."
+   )
+   private String _docTextColumn;
+
+   static public final String PARAM_DOCID_COLS = "IdColumns";
+   @ConfigurationParameter(
+         name = PARAM_DOCID_COLS,
+         description = "Specifies column names that will be used to form a document ID.",
+         mandatory = false
+   )
+   private String[] _docIdColumns;
+
+   static public final String PARAM_DOCID_DELIMITER = "IdDelimiter";
+   @ConfigurationParameter(
+         name = PARAM_DOCID_DELIMITER,
+         description = "Specifies delimiter used when document ID is built.",
+         mandatory = false
+   )
+   private String _docIdDelimiter = "_";
+
+
+   //Metadata
+   static public final String PARAM_PATIENT_COLUMN = "PatientColumn";
+   @ConfigurationParameter(
+         name = PARAM_PATIENT_COLUMN,
+         description = "Name of column that contains the patient identifier.",
+         mandatory = false
+   )
+   private String _patientIdentifier;
+
+   static public final String PARAM_PATIENT_ID = "PatientIdColumn";
+   @ConfigurationParameter(
+         name = PARAM_DOCID_DELIMITER,
+         description = "Name of column that contains the patient id.",
+         mandatory = false
+   )
+   private String _patientId;
+
+
+   // SourceData
+
+   static public final String PARAM_NOTE_TYPE = "NoteTypeColumn";
+   @ConfigurationParameter(
+         name = PARAM_NOTE_TYPE,
+         description = "Name of column that contains the note type.",
+         mandatory = false
+   )
+   private String _noteTypeCode;
+
+   static public final String PARAM_NOTE_SUBTYPE = "NoteSubtypeColumn";
+   @ConfigurationParameter(
+         name = PARAM_NOTE_SUBTYPE,
+         description = "Name of column that contains the note subtype.",
+         mandatory = false
+   )
+   private String _noteSubtypeCode;
+
+   static public final String PARAM_SPECIALTY = "SpecialtyColumn";
+   @ConfigurationParameter(
+         name = PARAM_SPECIALTY,
+         description = "Name of column that contains the author specialty.",
+         mandatory = false
+   )
+   private String _authorSpecialty;
+
+   static public final String PARAM_DOC_STANDARD = "StandardColumn";
+   @ConfigurationParameter(
+         name = PARAM_DOC_STANDARD,
+         description = "Name of column that contains the document standard.",
+         mandatory = false
+   )
+   private String _documentStandard;
+
+   static public final String PARAM_INSTANCE_ID = "InstanceIdColumn";
+   @ConfigurationParameter(
+         name = PARAM_INSTANCE_ID,
+         description = "Name of column that contains the document instance id.",
+         mandatory = false
+   )
+   private String _sourceInstanceId;
+
+   static public final String PARAM_REVISION = "RevisionColumn";
+   @ConfigurationParameter(
+         name = PARAM_REVISION,
+         description = "Name of column that contains the document revision number.",
+         mandatory = false
+   )
+   private String _sourceRevisionNumber;
+
+   static public final String PARAM_REVISION_DATE = "RevisionDateColumn";
+   @ConfigurationParameter(
+         name = PARAM_REVISION_DATE,
+         description = "Name of column that contains the document revision date.",
+         mandatory = false
+   )
+   private String _sourceRevisionDate;
+
+   static public final String PARAM_DATE_COLUMN = "DateColumn";
+   @ConfigurationParameter(
+         name = PARAM_DATE_COLUMN,
+         description = "Name of column that contains the document original date.",
+         mandatory = false
+   )
+   private String _sourceOriginalDate;
+
+   static public final String PARAM_INSTITUTE = "InstituteColumn";
+   @ConfigurationParameter(
+         name = PARAM_INSTITUTE,
+         description = "Name of column that contains the source institution.",
+         mandatory = false
+   )
+   private String _sourceInstitution;
+
+   static public final String PARAM_ENCOUNTER = "EncounterIdColumn";
+   @ConfigurationParameter(
+         name = PARAM_ENCOUNTER,
+         description = "Name of column that contains the encounter id.",
+         mandatory = false
+   )
+   private String _sourceEncounterId;
+
+   //Demographics
+   static public final String PARAM_BIRTHDAY = "BirthColumn";
+   @ConfigurationParameter(
+         name = PARAM_BIRTHDAY,
+         description = "Name of column that contains the patient birth date.",
+         mandatory = false
+   )
+   private String _birthDate;
+
+   static public final String PARAM_DEATHDAY = "DeathColumn";
+   @ConfigurationParameter(
+         name = PARAM_DEATHDAY,
+         description = "Name of column that contains the patient death date.",
+         mandatory = false
+   )
+   private String _deathDate;
+
+   static public final String PARAM_GENDER = "GenderColumn";
+   @ConfigurationParameter(
+         name = PARAM_GENDER,
+         description = "Name of column that contains the patient gender.",
+         mandatory = false
+   )
+   private String _gender;
+
+   static public final String PARAM_FIRST_NAME = "FirstNameColumn";
+   @ConfigurationParameter(
+         name = PARAM_FIRST_NAME,
+         description = "Name of column that contains the patient first name.",
+         mandatory = false
+   )
+   private String _firstName;
+
+   static public final String PARAM_MIDDLE_NAME = "MiddleNameColumn";
+   @ConfigurationParameter(
+         name = PARAM_MIDDLE_NAME,
+         description = "Name of column that contains the patient middle name.",
+         mandatory = false
+   )
+   private String _middleName;
+
+   static public final String PARAM_LAST_NAME = "LastNameColumn";
+   @ConfigurationParameter(
+         name = PARAM_LAST_NAME,
+         description = "Name of column that contains the patient last name.",
+         mandatory = false
+   )
+   private String _lastName;
+
+   static public final String PARAM_FIRST_SOUNDEX = "FirstSoundexColumn";
+   @ConfigurationParameter(
+         name = PARAM_FIRST_SOUNDEX,
+         description = "Name of column that contains the patient first name soundex.",
+         mandatory = false
+   )
+   private String _firstNameSoundex;
+
+   static public final String PARAM_LAST_SOUNDEX = "LastSoundexColumn";
+   @ConfigurationParameter(
+         name = PARAM_LAST_SOUNDEX,
+         description = "Name of column that contains the patient last name soundex.",
+         mandatory = false
+   )
+   private String _lastNameSoundex;
+
+
+   private Connection _connection;
+
+   private Decryptor _decryptor;
+
+   private PreparedStatement _preparedStatement;
+   private ResultSet _resultSet;
+   private int _docColumnType;
+   private String _docColumnTypeName;
+
+   private long _startMillis;
+   private int _totalRowCount = 0;
+   private int _rowIndex = 0;
+   private String _docId;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext context ) throws ResourceInitializationException {
+      super.initialize( context );
+      _connection = createConnection( _dbDriver, _url, _user, _pass, _keepAlive );
+      _decryptor = createDecryptor( _dbDecryptor );
+      _preparedStatement = createSqlStatement( _connection );
+      _startMillis = System.currentTimeMillis();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean hasNext() throws IOException, CollectionException {
+      if ( _resultSet == null ) {
+         try {
+            fillResultSet();
+            setupDocColumnType();
+         } catch ( SQLException sqlE ) {
+            // thrown by createResultSet() and setupDocColumnType(), rethrow as declared CollectionException
+            throw new CollectionException( sqlE );
+         }
+      }
+      boolean hasAnotherRow;
+      try {
+         hasAnotherRow = _resultSet.next();
+         if ( hasAnotherRow ) {
+            _docId = createDocId();
+         } else {
+            // it's important to close ResultSets as they can accumulate
+            // in the JVM heap. Too many open result sets can inadvertently
+            // cause the DB conn to be closed by the server.
+            _resultSet.close();
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by ResultSet.next() and ResultSet.close()
+         throw new CollectionException( sqlE );
+      }
+      return hasAnotherRow;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void getNext( final JCas jCas ) throws IOException, CollectionException {
+      _rowIndex++;
+      if ( jCas == null ) {
+         throw new CollectionException( new NullPointerException( "Null CAS " + _rowIndex
+                                                                  + " in " + getClass().getName() +
+                                                                  ".getNext( JCAS )" ) );
+      }
+      // pull doc text from resultset - may throw IOException
+      final String clobDocument = getClobDocument();
+      // get the plain text version of the clob document
+      final String document = getTextDocument( clobDocument );
+      try {
+         jCas.setDocumentText( document );
+      } catch ( CASRuntimeException casRTE ) {
+         // thrown by JCas.setDocumentText(..) , rethrow as declared CollectionException
+         throw new CollectionException( casRTE );
+      }
+      // Put doc Id in the cas
+      final DocumentID docIdAnnot = new DocumentID( jCas );
+      docIdAnnot.setDocumentID( _docId );
+      docIdAnnot.addToIndexes();
+      LOGGER.info( "Reading document number " + _rowIndex + " with ID " + _docId );
+      // Set the rest of the patient and doc info
+      try {
+         setMetadata( jCas );
+      } catch ( SQLException sqlE ) {
+         // thrown by setMetaData(..) inner calls to ResultSet.get*(..) , rethrow as declared IOException
+         throw new IOException( sqlE );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Progress[] getProgress() {
+      final Progress p = new ProgressImpl( _rowIndex, _totalRowCount, Progress.ENTITIES );
+      return new Progress[] { p };
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void close() throws IOException {
+      final long totalMillis = System.currentTimeMillis() - _startMillis;
+      final long totalSeconds = totalMillis / 1000l;
+      final long hourSeconds = 60 * 60;
+      final long daySeconds = 24 * hourSeconds;
+      final long days = totalSeconds / daySeconds;
+      final long hours = (totalSeconds - days * daySeconds) / hourSeconds;
+      final long minutes = (totalSeconds - days * daySeconds - hours * hourSeconds) / 60;
+      final long seconds = totalSeconds % 60;
+      LOGGER.info( getClass().getName() + " read " + _totalRowCount + " documents in "
+                   + days + " days, " + hours + " hours, " + minutes + " minutes and " + seconds + " seconds" );
+      try {
+         if ( !_resultSet.isClosed() ) {
+            // Some jdbc drivers may not close the ResultSet when the PreparedStatement is closed
+            _resultSet.close();
+         }
+         if ( !_preparedStatement.isClosed() ) {
+            _preparedStatement.close();
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by ResultSet.close() and Statement.close()
+         // rethrow as IOException to fit the declared exception type
+         throw new IOException( sqlE );
+      }
+   }
+
+   /**
+    * @param connection -
+    * @return a prepared statement
+    * @throws ResourceInitializationException -
+    */
+   private PreparedStatement createSqlStatement( final Connection connection ) throws ResourceInitializationException {
+      try {
+         _preparedStatement = connection.prepareStatement( _sqlStatement );
+         _totalRowCount = getTotalRowCount( connection, _sqlStatement );
+      } catch ( SQLException sqlE ) {
+         // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
+         LOGGER.error( "Could not interact with Database" );
+         throw new ResourceInitializationException( sqlE );
+      }
+      return _preparedStatement;
+   }
+
+   /**
+    * Slice up the query SQL and rebuild a SQL statement that gets a row count;
+    *
+    * @param connection -
+    * @param querySql   the sql specified by the user for the full data fetch.
+    * @return total row count
+    * @throws SQLException -
+    */
+   private int getTotalRowCount( final Connection connection, final String querySql ) throws SQLException {
+      final PreparedStatement countStatement = createCountSql( connection, querySql );
+      int totalRowCount;
+      totalRowCount = getTotalRowCount( countStatement );
+      if ( !countStatement.isClosed() ) {
+         countStatement.close();
+      }
+      LOGGER.info( "Processing row count:" + totalRowCount );
+      return totalRowCount;
+   }
+
+   /**
+    * Slice up the query SQL and rebuild a SQL statement that gets a row count;
+    *
+    * @param connection -
+    * @param querySql   the sql specified by the user for the full data fetch.
+    * @return a select count statement
+    * @throws SQLException -
+    */
+   static private PreparedStatement createCountSql( final Connection connection,
+                                                    final String querySql ) throws SQLException {
+      final StringBuilder sb = new StringBuilder();
+      sb.append( "SELECT COUNT(*) " );
+      final int fromIndex = querySql.toUpperCase().indexOf( "FROM" );
+      sb.append( querySql.subSequence( fromIndex, querySql.length() ) );
+      return connection.prepareStatement( sb.toString() );
+   }
+
+   /**
+    * @param countStatement a select count statement
+    * @return number of rows that satisfy the user's select call
+    * @throws SQLException -
+    */
+   static private int getTotalRowCount( final PreparedStatement countStatement ) throws SQLException {
+      final ResultSet resultSet = countStatement.executeQuery();
+      resultSet.next();
+      final int count = resultSet.getInt( 1 );
+      // Some jdbc drivers may not close the ResultSet when the PreparedStatement is closed
+      resultSet.close();
+      countStatement.close();
+      return count;
+   }
+
+   /**
+    * Fetches all of the data from the db
+    *
+    * @throws SQLException -
+    */
+   private void fillResultSet() throws SQLException {
+      LOGGER.info( "SQL: " + _preparedStatement.toString() );
+      _resultSet = _preparedStatement.executeQuery();
+   }
+
+   /**
+    * Attempts to automatically determine the datatype of the column containing document text.
+    *
+    * @throws SQLException -
+    */
+   private void setupDocColumnType() throws SQLException {
+      final ResultSetMetaData rsMetaData = _resultSet.getMetaData();
+      final int colIdx = _resultSet.findColumn( _docTextColumn );
+      _docColumnType = rsMetaData.getColumnType( colIdx );
+      _docColumnTypeName = rsMetaData.getColumnTypeName( colIdx );
+   }
+
+
+   /**
+    * Builds a document ID from one or more pieces of query data.
+    * If the query data is not specified OR if an SQLException is caught, the next row # is used.
+    * This method should not throw an exception that stops the entire run when a row index can be used as an identifier
+    *
+    * @return document ID
+    */
+   private String createDocId() {
+      if ( _docIdColumns == null ) {
+         return String.valueOf( _rowIndex + 1 );
+      }
+      final StringBuilder sb = new StringBuilder();
+      // use flag to determine the first iteration in the loop, used for delimiter
+      boolean firstColumn = true;
+      try {
+         for ( String columnName : _docIdColumns ) {
+            if ( !firstColumn ) {
+               sb.append( _docIdDelimiter );
+            } else {
+               firstColumn = false;
+            }
+            final String columnValue = _resultSet.getObject( columnName ).toString();
+            sb.append( columnValue );
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by ResultSet.getObject(..) and should be handled in this method createDocumentID(..)
+         // do not throw an exception here if there is default behavior, which is to use row number
+         return String.valueOf( _rowIndex );
+      }
+      return sb.toString();
+
+   }
+
+   /**
+    * @return raw document text
+    * @throws IOException -
+    */
+   private String getClobDocument() throws IOException {
+      // pull doc text from resultset
+      String document;
+      try {
+         if ( _docColumnType == Types.CHAR || _docColumnType == Types.VARCHAR ) {
+            document = _resultSet.getString( _docTextColumn );
+         } else if ( _docColumnType == Types.CLOB ) {
+            document = convertToString( _resultSet.getClob( _docTextColumn ) );
+         } else {
+            if ( !_docColumnTypeName.equals( "text" ) ) {
+               LOGGER.warn( "Inferring document text column as string type: " + _docColumnTypeName );
+            }
+            document = _resultSet.getString( _docTextColumn );
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by ResultSet.getString(..) and ResultSet.getClob(..) and convertToString(..)
+         // rethrow as declared IOException
+         throw new IOException( sqlE );
+         // IOException thrown by convertToString(..) , ignoring as it will be passed through as declared
+      }
+      return document;
+   }
+
+   /**
+    * @param clobDocument raw document text
+    * @return decrypted document text
+    * @throws IOException -
+    */
+   private String getTextDocument( final String clobDocument ) throws IOException {
+      if ( _decryptPass == null || _decryptPass.trim().isEmpty() ) {
+         // Assume that the clob document is not encrypted
+         return clobDocument;
+      }
+      //Decrypt the encrypted doc
+      try {
+         return _decryptor.decrypt( _decryptPass, clobDocument );
+      } catch ( Exception e ) {
+         // raw Exception thrown by decrypt(..) , rethrow as declared IOException
+         throw new IOException( e );
+      }
+   }
+
+   /**
+    * Loads the clob data into a String object.
+    *
+    * @param clob -
+    * @return clob as single string with \n as line separator
+    * @throws SQLException -
+    * @throws IOException  -
+    */
+   static private String convertToString( final Clob clob ) throws SQLException, IOException {
+      final StringBuilder sb = new StringBuilder();
+      final BufferedReader br = new BufferedReader( clob.getCharacterStream() );
+      String line = br.readLine();
+      while ( line != null ) {
+         sb.append( line );
+         sb.append( '\n' );
+         line = br.readLine();
+      }
+      br.close();
+      return sb.toString();
+   }
+
+
+   /**
+    * Fill the document metadata information
+    *
+    * @param jCas ye olde ...
+    * @throws SQLException -
+    */
+   private void setMetadata( final JCas jCas ) throws SQLException {
+      final Metadata metadata = new Metadata( jCas );
+      metadata.setPatientIdentifier( getResult( _patientIdentifier ) );
+      final Long patientId = getResultLong( _patientId );
+      if ( patientId != null ) {
+         metadata.setPatientID( patientId );
+      }
+
+      final SourceData sourcedata = createSourceData( jCas );
+      metadata.setSourceData( sourcedata );
+
+      final Demographics demographics = createDemographics( jCas );
+      metadata.setDemographics( demographics );
+
+      jCas.addFsToIndexes( metadata );
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return data about note source
+    * @throws SQLException -
+    */
+   private SourceData createSourceData( final JCas jCas ) throws SQLException {
+      final SourceData sourcedata = new SourceData( jCas );
+      sourcedata.setNoteTypeCode( getResult( _noteTypeCode ) );
+      sourcedata.setNoteSubTypeCode( getResult( _noteSubtypeCode ) );
+      sourcedata.setAuthorSpecialty( getResult( _authorSpecialty ) );
+      sourcedata.setDocumentStandard( getResult( _documentStandard ) );
+      sourcedata.setSourceInstanceId( getResult( _sourceInstanceId ) );
+      final Integer revision = getResultInt( _sourceRevisionNumber );
+      if ( revision != null ) {
+         sourcedata.setSourceRevisionNbr( revision );
+      }
+      final Date revisionDate = getResultDate( _sourceRevisionDate );
+      if ( revisionDate != null ) {
+         sourcedata.setSourceRevisionDate( revisionDate.toString() );
+      }
+      final Date originalDate = getResultDate( _sourceOriginalDate );
+      if ( originalDate != null ) {
+         sourcedata.setSourceOriginalDate( originalDate.toString() );
+      }
+      sourcedata.setSourceInstitution( getResult( _sourceInstitution ) );
+      sourcedata.setSourceEncounterId( getResult( _sourceEncounterId ) );
+      return sourcedata;
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return data about note patient
+    * @throws SQLException -
+    */
+   private Demographics createDemographics( final JCas jCas ) throws SQLException {
+      final Demographics demographics = new Demographics( jCas );
+      final Date birthDate = getResultDate( _birthDate );
+      if ( birthDate != null ) {
+         demographics.setBirthDate( birthDate.toString() );
+      }
+      final Date deathDate = getResultDate( _deathDate );
+      if ( deathDate != null ) {
+         demographics.setDeathDate( deathDate.toString() );
+      }
+      demographics.setGender( getResult( _gender ) );
+      demographics.setFirstName( getResult( _firstName ) );
+      demographics.setMiddleName( getResult( _middleName ) );
+      demographics.setLastName( getResult( _lastName ) );
+      demographics.setFirstNameSoundex( getResult( _firstNameSoundex ) );
+      demographics.setLastNameSoundex( getResult( _lastNameSoundex ) );
+      return demographics;
+   }
+
+   /**
+    * @param column column name
+    * @return text in column or empty if column name is actually not specified
+    * @throws SQLException -
+    */
+   private String getResult( final String column ) throws SQLException {
+      if ( column == null || column.isEmpty() ) {
+         return "";
+      }
+      return _resultSet.getString( column );
+   }
+
+   /**
+    * @param column column name
+    * @return int in column or null if column name is actually not specified
+    * @throws SQLException -
+    */
+   private Integer getResultInt( final String column ) throws SQLException {
+      if ( column == null || column.isEmpty() ) {
+         return null;
+      }
+      return _resultSet.getInt( column );
+   }
+
+   /**
+    * @param column column name
+    * @return long in column or null if column name is actually not specified
+    * @throws SQLException -
+    */
+   private Long getResultLong( final String column ) throws SQLException {
+      if ( column == null || column.isEmpty() ) {
+         return null;
+      }
+      return _resultSet.getLong( column );
+   }
+
+   /**
+    * @param column column name
+    * @return date in column or null if column name is actually not specified
+    * @throws SQLException -
+    */
+   private Date getResultDate( final String column ) throws SQLException {
+      if ( column == null || column.isEmpty() ) {
+         return null;
+      }
+      return _resultSet.getDate( column );
+   }
+
+   /**
+    * @param driver    -
+    * @param url       -
+    * @param user      -
+    * @param pass      -
+    * @param keepAlive -
+    * @return a connection to the database
+    * @throws ResourceInitializationException -
+    */
+   static private Connection createConnection( final String driver,
+                                               final String url,
+                                               final String user,
+                                               final String pass,
+                                               final String keepAlive ) throws ResourceInitializationException {
+      final Object[] emptyObjectArray = new Object[ 0 ];
+      try {
+         if ( keepAlive != null && !keepAlive.isEmpty() && Boolean.valueOf( keepAlive ) ) {
+            return new WrappedConnection( user, pass, driver, url );
+         }
+         final Class driverClass = Class.forName( driver );
+         return DriverManager.getConnection( url, user, pass );
+      } catch ( ClassNotFoundException | SQLException multE ) {
+         throw new ResourceInitializationException( "Could not construct " + driver,
+               emptyObjectArray, multE );
+      }
+   }
+
+   /**
+    * @param decryptorClassName user-specified class name for a decrypter
+    * @return a class that can be used to decrypt notes or the PassThroughDecryptor if the user didn't specify one.
+    * @throws ResourceInitializationException -
+    */
+   static private Decryptor createDecryptor( final String decryptorClassName ) throws ResourceInitializationException {
+      if ( decryptorClassName == null || decryptorClassName.isEmpty() ) {
+         return new PassThroughDecryptor();
+      }
+      final Object[] emptyObjectArray = new Object[ 0 ];
+      Class decryptorClass;
+      try {
+         decryptorClass = Class.forName( decryptorClassName );
+      } catch ( ClassNotFoundException cnfE ) {
+         throw new ResourceInitializationException( "Unknown class " + decryptorClassName, emptyObjectArray, cnfE );
+      }
+      if ( !Decryptor.class.isAssignableFrom( decryptorClass ) ) {
+         return createWrappedDecryptor( decryptorClass );
+      }
+      final Constructor[] constructors = decryptorClass.getConstructors();
+      for ( Constructor constructor : constructors ) {
+         try {
+            if ( constructor.getParameterTypes().length == 0 ) {
+               return (Decryptor)constructor.newInstance();
+            }
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException multE ) {
+            throw new ResourceInitializationException( "Could not construct " + decryptorClassName,
+                  emptyObjectArray, multE );
+         }
+      }
+      throw new ResourceInitializationException( "No Constructor for " + decryptorClassName, emptyObjectArray );
+   }
+
+   /**
+    * @param decryptorThingClass -
+    * @return a decryptor
+    * @throws ResourceInitializationException -
+    */
+   static private Decryptor createWrappedDecryptor( final Class decryptorThingClass )
+         throws ResourceInitializationException {
+      final Object[] emptyObjectArray = new Object[ 0 ];
+      final Class[] methodParameters = { String.class, String.class };
+      Method decryptMethod;
+      try {
+         decryptMethod = decryptorThingClass.getDeclaredMethod( "decrypt", methodParameters );
+         if ( !decryptMethod.getReturnType().equals( String.class ) ) {
+            throw new ResourceInitializationException( decryptorThingClass.getName()
+                                                       + ".decrypt( key, note ) method does not return text",
+                  emptyObjectArray );
+         }
+      } catch ( NoSuchMethodException nsmE ) {
+         throw new ResourceInitializationException( decryptorThingClass.getName()
+                                                    + " has no decrypt( key, note ) method",
+               emptyObjectArray );
+      }
+      final Constructor[] constructors = decryptorThingClass.getConstructors();
+      for ( Constructor constructor : constructors ) {
+         try {
+            if ( constructor.getParameterTypes().length == 0 ) {
+               LOGGER.info( "Wrapping " + decryptorThingClass.getName() + " in a Decryptor" );
+               return new DecryptorWrapper( constructor.newInstance(), decryptMethod );
+            }
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException multE ) {
+            throw new ResourceInitializationException( "Could not construct " + decryptorThingClass.getName(),
+                  emptyObjectArray, multE );
+         }
+      }
+      throw new ResourceInitializationException( "No Constructor for " + decryptorThingClass.getName(),
+            emptyObjectArray );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/PassThroughDecryptor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/PassThroughDecryptor.java?rev=1849704&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/PassThroughDecryptor.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/jdbc/PassThroughDecryptor.java Mon Dec 24 21:47:07 2018
@@ -0,0 +1,27 @@
+package org.apache.ctakes.core.cr.jdbc;
+
+
+import org.apache.log4j.Logger;
+
+/**
+ * Performs no decrpytion, just returns the note text
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 3/13/2015
+ */
+final public class PassThroughDecryptor implements Decryptor {
+
+   static private final Logger LOGGER = Logger.getLogger( "PassThroughDecryptor" );
+
+   /**
+    * Performs no decryption, just returns the note text
+    * {@inheritDoc}
+    */
+   @Override
+   public String decrypt( final String key, final String note ) {
+      return note;
+   }
+
+
+}