You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/08/28 13:58:03 UTC

svn commit: r1866038 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc: AbstractJdbcWriter.java db/JdbcDb.java table/AbstractJdbcTable.java table/AbstractUmlsTable.java table/JdbcTable.java

Author: seanfinan
Date: Wed Aug 28 13:58:03 2019
New Revision: 1866038

URL: http://svn.apache.org/viewvc?rev=1866038&view=rev
Log:
Add "BatchSize" parameter to AbstractJdbcWriter to change the default batch size.  0 or 1 = no internal batching.
Add some set methods to use BatchSize.
AbstractUmlsTable: moved two collection instantiations outside loop, clear inside loop.

Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java Wed Aug 28 13:58:03 2019
@@ -59,9 +59,17 @@ abstract public class AbstractJdbcWriter
    )
    private String _keepAlive;
 
+   static public final String PARAM_BATCH_SIZE = "BatchSize";
+   @ConfigurationParameter(
+         name = PARAM_BATCH_SIZE,
+         description = "Number of statements to use in a batch.  0 or 1 denotes that batches should not be used.",
+         mandatory = false
+   )
+   private String _batchSize;
+
 
    // Maximum row count for prepared statement batches
-   static private final int MAX_BATCH_SIZE = 100;
+   static private final int MAX_BATCH_SIZE = 256;
 
    static private final Object DATA_LOCK = new Object();
 
@@ -75,6 +83,14 @@ abstract public class AbstractJdbcWriter
    public void initialize( final UimaContext context ) throws ResourceInitializationException {
       super.initialize( context );
       _jdbcDb = createJdbcDb( _dbDriver, _url, _user, _pass, _keepAlive );
+      if ( _batchSize != null && !_batchSize.trim().isEmpty() ) {
+         try {
+            final int batchSize = Integer.decode( _batchSize.trim() );
+            _jdbcDb.setBatchSize( batchSize );
+         } catch ( NumberFormatException nfE ) {
+            LOGGER.error( "Could not parse batch size " + _batchSize );
+         }
+      }
    }
 
    /**

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java Wed Aug 28 13:58:03 2019
@@ -40,6 +40,15 @@ public interface JdbcDb {
    }
 
    /**
+    * @param batchSize batch size limit after which the batch is written to the db table.  Must be called after table creation.
+    */
+   default void setBatchSize( final int batchSize ) {
+      for ( JdbcTable<?> table : getTables() ) {
+         table.setBatchSize( batchSize );
+      }
+   }
+
+   /**
     * Close each table.
     *
     * @throws SQLException -

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java Wed Aug 28 13:58:03 2019
@@ -14,11 +14,11 @@ import java.sql.SQLException;
 @NotThreadSafe
 abstract public class AbstractJdbcTable<T> implements JdbcTable<T> {
 
-   static private final int DEFAULT_BATCH_LIMIT = 100;
+   static private final int DEFAULT_BATCH_LIMIT = 256;
 
    private final String _tableName;
    private final PreparedStatement _preparedStatement;
-   private int _batchLimit = DEFAULT_BATCH_LIMIT;
+   private int _batchSize = DEFAULT_BATCH_LIMIT;
    private int _batchIndex = 0;
 
    public AbstractJdbcTable( final Connection connection, final String tableName ) throws SQLException {
@@ -44,19 +44,21 @@ abstract public class AbstractJdbcTable<
    }
 
    /**
-    * @param limit batch size limit after which the batch is written to the db table.  Max 10,000.
+    * @param batchSize batch size limit after which the batch is written to the db table.  Max 10,000.  0 or 1 disable batching.
     */
-   final public void setBatchLimit( final int limit ) {
-      if ( limit > 0 && limit <= 10000 ) {
-         _batchLimit = limit;
+   @Override
+   final public void setBatchSize( final int batchSize ) {
+      if ( batchSize >= 0 && batchSize <= 10000 ) {
+         _batchSize = batchSize;
       }
    }
 
    /**
-    * @return batch size limit after which the batch is written to the db table.
+    * {@inheritDoc}
     */
-   final public int getBatchLimit() {
-      return _batchLimit;
+   @Override
+   final public int getBatchSize() {
+      return _batchSize;
    }
 
    /**
@@ -65,9 +67,15 @@ abstract public class AbstractJdbcTable<
     */
    protected boolean writeRow() throws SQLException {
       final PreparedStatement statement = getPreparedStatement();
+      if ( _batchSize < 2 ) {
+         // If the batch limit is 0 or 1 then write each row as it is populated.
+         statement.execute();
+         return true;
+      }
+      // Otherwise use a batch.
       statement.addBatch();
       _batchIndex++;
-      if ( _batchIndex >= _batchLimit ) {
+      if ( _batchIndex >= _batchSize ) {
          _batchIndex = 0;
          statement.executeBatch();
          statement.clearBatch();

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java Wed Aug 28 13:58:03 2019
@@ -75,14 +75,15 @@ abstract public class AbstractUmlsTable<
       row.initializeCorpus( corpusInitializer );
       row.initializePatient( value );
       row.initializeDocument( value );
+
+      final Collection<UmlsConcept> removals = new ArrayList<>();
+      final Collection<String> cuis = new HashSet<>();
       boolean batchWritten = false;
       final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( value, IdentifiedAnnotation.class );
       for ( IdentifiedAnnotation annotation : annotations ) {
          row.initializeEntity( annotation );
          final Collection<UmlsConcept> umlsConcepts = OntologyConceptUtil.getUmlsConcepts( annotation );
          if ( !_repeatCuis && umlsConcepts.size() > 1 ) {
-            final Collection<UmlsConcept> removals = new ArrayList<>();
-            final Collection<String> cuis = new HashSet<>();
             for ( UmlsConcept concept : umlsConcepts ) {
                if ( cuis.contains( concept.getCui() ) ) {
                   removals.add( concept );
@@ -90,6 +91,8 @@ abstract public class AbstractUmlsTable<
                cuis.add( concept.getCui() );
             }
             umlsConcepts.removeAll( removals );
+            removals.clear();
+            cuis.clear();
          }
          for ( UmlsConcept concept : umlsConcepts ) {
             row.addToStatement( statement, concept );

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java Wed Aug 28 13:58:03 2019
@@ -27,6 +27,16 @@ public interface JdbcTable<T> {
 
    PreparedStatement getPreparedStatement();
 
+   /**
+    * @param batchSize batch size limit after which the batch is written to the db table.
+    */
+   void setBatchSize( final int batchSize );
+
+   /**
+    * @return batch size limit after which the batch is written to the db table.
+    */
+   int getBatchSize();
+
    default Collection<JdbcField<?>> getFields() {
       return getJdbcRow().getFields();
    }