You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2022/04/18 17:38:31 UTC
Change in asterixdb[master]: [NO ISSUE][REP] Increase replication ack timeout
From Dmitry Lychagin <dm...@couchbase.com>:
Hello Murtadha Hubail, Ali Alsuliman, Jenkins,
I'd like you to do a code review. Please visit
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125
to review the following change.
Change subject: [NO ISSUE][REP] Increase replication ack timeout
......................................................................
[NO ISSUE][REP] Increase replication ack timeout
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Increase replication ack timeout to 120 seconds.
Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15843
Reviewed-by: Ali Alsuliman <al...@gmail.com>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
M asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
5 files changed, 10 insertions(+), 5 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/25/16125/1
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index d99f2e9..54793dc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 479de3e..06da0ad 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 3349f11..bef14c1 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
index dd42936..ada3875 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
@@ -48,7 +48,7 @@
"The size in bytes to replicate in each batch"),
REPLICATION_TIMEOUT(
LONG,
- TimeUnit.SECONDS.toSeconds(30),
+ TimeUnit.SECONDS.toSeconds(120),
"The time in seconds to timeout waiting for master or replica to ack"),
REPLICATION_ENABLED(BOOLEAN, false, "Whether or not data replication is enabled"),
REPLICATION_FACTOR(NONNEGATIVE_INTEGER, 2, "Number of replicas (backups) to maintain per master replica"),
diff --git a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
index 1ea076d..172bd59 100644
--- a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
+++ b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
@@ -37,12 +37,15 @@
import org.apache.asterix.replication.sync.IndexSynchronizer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
/**
* A task to mark a replicated LSM component as valid
*/
public class MarkComponentValidTask implements IReplicaTask {
+ private static final Logger LOGGER = LogManager.getLogger();
private final long masterLsn;
private final long lastComponentId;
private final String file;
@@ -90,7 +93,9 @@
// wait until the lsn mapping is flushed to disk
while (!indexCheckpointManager.isFlushed(masterLsn)) {
if (replicationTimeOut <= 0) {
- throw new ReplicationException(new TimeoutException("Couldn't receive flush lsn from master"));
+ LOGGER.warn("{} seconds passed without receiving flush lsn {} from master for component {}",
+ appCtx.getReplicationProperties().getReplicationTimeOut(), masterLsn, file);
+ throw new ReplicationException(new TimeoutException("couldn't receive flush lsn from master"));
}
final long startTime = System.nanoTime();
indexCheckpointManager.wait(replicationTimeOut);
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
Gerrit-Change-Number: 16125
Gerrit-PatchSet: 1
Gerrit-Owner: Dmitry Lychagin <dm...@couchbase.com>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Murtadha Hubail <mh...@apache.org>
Gerrit-MessageType: newchange
Change in asterixdb[master]: [NO ISSUE][REP] Increase replication ack timeout
Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Dmitry Lychagin <dm...@couchbase.com>:
Dmitry Lychagin has abandoned this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125 )
Change subject: [NO ISSUE][REP] Increase replication ack timeout
......................................................................
Abandoned
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
Gerrit-Change-Number: 16125
Gerrit-PatchSet: 1
Gerrit-Owner: Dmitry Lychagin <dm...@couchbase.com>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Murtadha Hubail <mh...@apache.org>
Gerrit-MessageType: abandon
Change in asterixdb[master]: [NO ISSUE][REP] Increase replication ack timeout
Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Dmitry Lychagin <dm...@couchbase.com>:
Hello Murtadha Hubail, Ali Alsuliman, Jenkins,
I'd like you to do a code review. Please visit
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125
to review the following change.
Change subject: [NO ISSUE][REP] Increase replication ack timeout
......................................................................
[NO ISSUE][REP] Increase replication ack timeout
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Increase replication ack timeout to 120 seconds.
Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15843
Reviewed-by: Ali Alsuliman <al...@gmail.com>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
M asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
5 files changed, 10 insertions(+), 5 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/25/16125/1
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index d99f2e9..54793dc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 479de3e..06da0ad 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 3349f11..bef14c1 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -44,7 +44,7 @@
"replication\.log\.buffer\.numpages" : 8,
"replication\.log\.buffer\.pagesize" : 131072,
"replication\.strategy" : "none",
- "replication\.timeout" : 30,
+ "replication\.timeout" : 120,
"ssl\.enabled" : false,
"storage.compression.block" : "snappy",
"storage.global.cleanup.timeout" : 600,
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
index dd42936..ada3875 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
@@ -48,7 +48,7 @@
"The size in bytes to replicate in each batch"),
REPLICATION_TIMEOUT(
LONG,
- TimeUnit.SECONDS.toSeconds(30),
+ TimeUnit.SECONDS.toSeconds(120),
"The time in seconds to timeout waiting for master or replica to ack"),
REPLICATION_ENABLED(BOOLEAN, false, "Whether or not data replication is enabled"),
REPLICATION_FACTOR(NONNEGATIVE_INTEGER, 2, "Number of replicas (backups) to maintain per master replica"),
diff --git a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
index 1ea076d..172bd59 100644
--- a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
+++ b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
@@ -37,12 +37,15 @@
import org.apache.asterix.replication.sync.IndexSynchronizer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
/**
* A task to mark a replicated LSM component as valid
*/
public class MarkComponentValidTask implements IReplicaTask {
+ private static final Logger LOGGER = LogManager.getLogger();
private final long masterLsn;
private final long lastComponentId;
private final String file;
@@ -90,7 +93,9 @@
// wait until the lsn mapping is flushed to disk
while (!indexCheckpointManager.isFlushed(masterLsn)) {
if (replicationTimeOut <= 0) {
- throw new ReplicationException(new TimeoutException("Couldn't receive flush lsn from master"));
+ LOGGER.warn("{} seconds passed without receiving flush lsn {} from master for component {}",
+ appCtx.getReplicationProperties().getReplicationTimeOut(), masterLsn, file);
+ throw new ReplicationException(new TimeoutException("couldn't receive flush lsn from master"));
}
final long startTime = System.nanoTime();
indexCheckpointManager.wait(replicationTimeOut);
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16125
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
Gerrit-Change-Number: 16125
Gerrit-PatchSet: 1
Gerrit-Owner: Dmitry Lychagin <dm...@couchbase.com>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Murtadha Hubail <mh...@apache.org>
Gerrit-MessageType: newchange