You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/08/28 13:58:03 UTC
svn commit: r1866038 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc:
AbstractJdbcWriter.java db/JdbcDb.java table/AbstractJdbcTable.java
table/AbstractUmlsTable.java table/JdbcTable.java
Author: seanfinan
Date: Wed Aug 28 13:58:03 2019
New Revision: 1866038
URL: http://svn.apache.org/viewvc?rev=1866038&view=rev
Log:
Add "BatchSize" parameter to AbstractJdbcWriter to change the default batch size. 0 or 1 = no internal batching.
Add some set methods to use BatchSize.
AbstractUmlsTable: moved two collection instantiations outside loop, clear inside loop.
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java Wed Aug 28 13:58:03 2019
@@ -59,9 +59,17 @@ abstract public class AbstractJdbcWriter
)
private String _keepAlive;
+ static public final String PARAM_BATCH_SIZE = "BatchSize";
+ @ConfigurationParameter(
+ name = PARAM_BATCH_SIZE,
+ description = "Number of statements to use in a batch. 0 or 1 denotes that batches should not be used.",
+ mandatory = false
+ )
+ private String _batchSize;
+
// Maximum row count for prepared statement batches
- static private final int MAX_BATCH_SIZE = 100;
+ static private final int MAX_BATCH_SIZE = 256;
static private final Object DATA_LOCK = new Object();
@@ -75,6 +83,14 @@ abstract public class AbstractJdbcWriter
public void initialize( final UimaContext context ) throws ResourceInitializationException {
super.initialize( context );
_jdbcDb = createJdbcDb( _dbDriver, _url, _user, _pass, _keepAlive );
+ if ( _batchSize != null && !_batchSize.trim().isEmpty() ) {
+ try {
+ final int batchSize = Integer.decode( _batchSize.trim() );
+ _jdbcDb.setBatchSize( batchSize );
+ } catch ( NumberFormatException nfE ) {
+ LOGGER.error( "Could not parse batch size " + _batchSize );
+ }
+ }
}
/**
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/db/JdbcDb.java Wed Aug 28 13:58:03 2019
@@ -40,6 +40,15 @@ public interface JdbcDb {
}
/**
+ * @param batchSize batch size limit after which the batch is written to the db table. Must be called after table creation.
+ */
+ default void setBatchSize( final int batchSize ) {
+ for ( JdbcTable<?> table : getTables() ) {
+ table.setBatchSize( batchSize );
+ }
+ }
+
+ /**
* Close each table.
*
* @throws SQLException -
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractJdbcTable.java Wed Aug 28 13:58:03 2019
@@ -14,11 +14,11 @@ import java.sql.SQLException;
@NotThreadSafe
abstract public class AbstractJdbcTable<T> implements JdbcTable<T> {
- static private final int DEFAULT_BATCH_LIMIT = 100;
+ static private final int DEFAULT_BATCH_LIMIT = 256;
private final String _tableName;
private final PreparedStatement _preparedStatement;
- private int _batchLimit = DEFAULT_BATCH_LIMIT;
+ private int _batchSize = DEFAULT_BATCH_LIMIT;
private int _batchIndex = 0;
public AbstractJdbcTable( final Connection connection, final String tableName ) throws SQLException {
@@ -44,19 +44,21 @@ abstract public class AbstractJdbcTable<
}
/**
- * @param limit batch size limit after which the batch is written to the db table. Max 10,000.
+ * @param batchSize batch size limit after which the batch is written to the db table. Max 10,000. 0 or 1 disable batching.
*/
- final public void setBatchLimit( final int limit ) {
- if ( limit > 0 && limit <= 10000 ) {
- _batchLimit = limit;
+ @Override
+ final public void setBatchSize( final int batchSize ) {
+ if ( batchSize >= 0 && batchSize <= 10000 ) {
+ _batchSize = batchSize;
}
}
/**
- * @return batch size limit after which the batch is written to the db table.
+ * {@inheritDoc}
*/
- final public int getBatchLimit() {
- return _batchLimit;
+ @Override
+ final public int getBatchSize() {
+ return _batchSize;
}
/**
@@ -65,9 +67,15 @@ abstract public class AbstractJdbcTable<
*/
protected boolean writeRow() throws SQLException {
final PreparedStatement statement = getPreparedStatement();
+ if ( _batchSize < 2 ) {
+ // If the batch limit is 0 or 1 then write each row as it is populated.
+ statement.execute();
+ return true;
+ }
+ // Otherwise use a batch.
statement.addBatch();
_batchIndex++;
- if ( _batchIndex >= _batchLimit ) {
+ if ( _batchIndex >= _batchSize ) {
_batchIndex = 0;
statement.executeBatch();
statement.clearBatch();
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/AbstractUmlsTable.java Wed Aug 28 13:58:03 2019
@@ -75,14 +75,15 @@ abstract public class AbstractUmlsTable<
row.initializeCorpus( corpusInitializer );
row.initializePatient( value );
row.initializeDocument( value );
+
+ final Collection<UmlsConcept> removals = new ArrayList<>();
+ final Collection<String> cuis = new HashSet<>();
boolean batchWritten = false;
final Collection<IdentifiedAnnotation> annotations = JCasUtil.select( value, IdentifiedAnnotation.class );
for ( IdentifiedAnnotation annotation : annotations ) {
row.initializeEntity( annotation );
final Collection<UmlsConcept> umlsConcepts = OntologyConceptUtil.getUmlsConcepts( annotation );
if ( !_repeatCuis && umlsConcepts.size() > 1 ) {
- final Collection<UmlsConcept> removals = new ArrayList<>();
- final Collection<String> cuis = new HashSet<>();
for ( UmlsConcept concept : umlsConcepts ) {
if ( cuis.contains( concept.getCui() ) ) {
removals.add( concept );
@@ -90,6 +91,8 @@ abstract public class AbstractUmlsTable<
cuis.add( concept.getCui() );
}
umlsConcepts.removeAll( removals );
+ removals.clear();
+ cuis.clear();
}
for ( UmlsConcept concept : umlsConcepts ) {
row.addToStatement( statement, concept );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java?rev=1866038&r1=1866037&r2=1866038&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/table/JdbcTable.java Wed Aug 28 13:58:03 2019
@@ -27,6 +27,16 @@ public interface JdbcTable<T> {
PreparedStatement getPreparedStatement();
+ /**
+ * @param batchSize batch size limit after which the batch is written to the db table.
+ */
+ void setBatchSize( final int batchSize );
+
+ /**
+ * @return batch size limit after which the batch is written to the db table.
+ */
+ int getBatchSize();
+
default Collection<JdbcField<?>> getFields() {
return getJdbcRow().getFields();
}