You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "Pavel Pereslegin (Jira)" <ji...@apache.org> on 2023/05/16 09:44:00 UTC
[jira] [Resolved] (IGNITE-19247) BatchUpdateException: Replication is timed out" upon inserting rows in batches via JDBC
[ https://issues.apache.org/jira/browse/IGNITE-19247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Pavel Pereslegin resolved IGNITE-19247.
---------------------------------------
Resolution: Cannot Reproduce
Hello [~Berkov],
this issue is caused by significant GC pressure due to a memory leak, which was fixed in IGNITE-19412.
I also noticed that the current reproducer is retrying {{executeBatch}} operation.
Since the set of batch commands is cleared when executed, it must be repopulated (using {{addBatch}}. Otherwise the retry of {{executeBatch}} will do nothing.
It is also recommended that you manually commit or rollback batch operations (using non-autocommit connection mode) and rollback changes when an exception occurs (otherwise it is difficult to control what part of the batch was committed).
> BatchUpdateException: Replication is timed out" upon inserting rows in batches via JDBC
> ---------------------------------------------------------------------------------------
>
> Key: IGNITE-19247
> URL: https://issues.apache.org/jira/browse/IGNITE-19247
> Project: Ignite
> Issue Type: Bug
> Components: general
> Affects Versions: 3.0
> Reporter: Alexander Belyak
> Assignee: Pavel Pereslegin
> Priority: Critical
> Labels: ignite-3
> Fix For: 3.0
>
> Attachments: ReplicationTimeoutReproducerClientLog.zip, node_0.log.zip, node_1.log.zip, serverLog.zip, test.log
>
>
> Start single node cluster:
> {noformat}
> git commit 78946d4c
> https://github.com/apache/ignite-3.git branch mainbuild by:
> ./gradlew clean allDistZip -x test -x integrationTest -x check -x modernizer
> start by:
> /tmp/ignite3-3.0.0-SNAPSHOT/ignite3-db-3.0.0-SNAPSHOT$ export IGNITE_HOME=$(pwd)
> /tmp/ignite3-3.0.0-SNAPSHOT/ignite3-db-3.0.0-SNAPSHOT$ bin/ignite3db start
> Starting Ignite 3...
> Node named defaultNode started successfully. REST addresses are [http://127.0.1.1:10300]
> /tmp/ignite3-3.0.0-SNAPSHOT/ignite3-cli-3.0.0-SNAPSHOT$ bin/ignite3 cluster init --cluster-endpoint-url=http://localhost:10300 --cluster-name=c1 --meta-storage-node=defaultNode
> Cluster was initialized successfully{noformat}
> Code below just create <TABLES> tables with <COLUMNS+1> columns (int key and varchar cols) and insert <ROWS> rows into each table (with SLEEP ms interval between operations, with <RETRY> attemps.
>
> {noformat}
> import java.sql.Connection;
> import java.sql.DriverManager;
> import java.sql.PreparedStatement;
> import java.sql.ResultSet;
> import java.sql.SQLException;
> import java.sql.Statement;
> public class TimeoutExceptionReproducer {
> private static final String DB_URL = "jdbc:ignite:thin://127.0.0.1:10800";
> private static final int COLUMNS = 10;
> private static final String TABLE_NAME = "K";
> private static final int ROWS = 100000;
> private static final int TABLES = 3;
> private static final int BATCH_SIZE = 100;
> private static final int SLEEP = 0;
> private static final int RETRY = 1;
> private static String getCreateSql(String tableName) {
> StringBuilder sql = new StringBuilder("create table ").append(tableName).append(" (id int primary key");
> for (int i = 0; i < COLUMNS; i++) {
> sql.append(", col").append(i).append(" varchar NOT NULL");
> }
> sql.append(")");
> return sql.toString();
> }
> private static final void s() {
> if (SLEEP > 0) {
> try {
> Thread.sleep(SLEEP);
> } catch (InterruptedException e) {
> // NoOp
> }
> }
> }
> private static void createTables(Connection connection, String tableName) throws SQLException {
> try (Statement stmt = connection.createStatement()) {
> System.out.println("Creating " + tableName);
> stmt.executeUpdate("drop table if exists " + tableName );
> s();
> stmt.executeUpdate(getCreateSql(tableName));
> s();
> }
> }
> private static String getInsertSql(String tableName) {
> StringBuilder sql = new StringBuilder("insert into ").append(tableName).append(" values(?");
> for (int i = 0; i < COLUMNS; i++) {
> sql.append(", ?");
> }
> sql.append(")");
> return sql.toString();
> }
> private static void insertBatch(PreparedStatement ps) {
> int retryCounter = 0;
> while(retryCounter <= RETRY) {
> try {
> ps.executeBatch();
> return;
> } catch (SQLException e) {
> System.err.println(retryCounter + " error while executing " + ps + ":" + e);
> retryCounter++;
> }
> }
> }
> private static void insertData(Connection connection, String tableName) throws SQLException {
> long ts = System.currentTimeMillis();
> try (PreparedStatement ps = connection.prepareStatement(getInsertSql(tableName))) {
> int batch = 0;
> for (int i = 0; i < ROWS; i++) {
> ps.setInt(1, i);
> for (int j = 2; j < COLUMNS + 2; j++) {
> ps.setString(j, "value" + i + "_" + j);
> }
> ps.addBatch();
> batch++;
> if (batch == BATCH_SIZE) {
> batch = 0;
> insertBatch(ps);
> ps.clearBatch();
> System.out.println("Batch " + BATCH_SIZE + " took " + (System.currentTimeMillis() - ts) + " to get " + i + " rows");
> s();
> ts = System.currentTimeMillis();
> }
> }
> if (batch > 0) {
> insertBatch(ps);
> ps.clearBatch();
> s();
> }
> }
> }
> private static int testData(Connection connection, String tableName) throws SQLException {
> try (Statement stmt = connection.createStatement();
> ResultSet rs = stmt.executeQuery("select count(*) from " + tableName);) {
> rs.next();
> int count = rs.getInt(1);
> int result = ROWS - count;
> if (result == 0) {
> System.out.println("Found " + count + " rows in " + tableName);
> } else {
> System.err.println("Found " + count + " rows in " + tableName + " instead of " + ROWS);
> }
> s();
> return result;
> }
> }
> public static void main(String[] args) throws SQLException {
> int lostRows = 0;
> try (Connection connection = DriverManager.getConnection(DB_URL)) {
> for (int i = 0; i < TABLES; i++) {
> String tableName = TABLE_NAME + i;
> createTables(connection, tableName);
> insertData(connection, tableName);
> lostRows += testData(connection, tableName);
> }
> }
> System.exit(lostRows);
> }
> }
> {noformat}
> Leads to
> 1) Replication timeout exceptions like:
> {noformat}
> 0 error while executing org.apache.ignite.internal.jdbc.JdbcPreparedStatement@68999068:java.sql.BatchUpdateException: IGN-CMN-65535 TraceId:335d779d-bee2-41be-a723-8a34a3b40347 Remote query execution
> 0 error while executing org.apache.ignite.internal.jdbc.JdbcPreparedStatement@6973b51b:java.sql.BatchUpdateException: IGN-REP-3 TraceId:b2d7a459-f3bf-497b-9bfb-bc7126813cd5 Replication is timed out [replicaGrpId=d3c988c3-0c7f-483e-bbc3-b9b124df144c_part_20]{noformat}
> 2) Data loss. Queries:
> {noformat}
> Count(*) from K1{noformat}
> Found 99877 rows in K1 instead of 100000.
> See server logs in the attachment (serverLog.zip), TimeoutExceptionReproducer (ReplicationTimeoutReproducerClientLog.zip).
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)