You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2021/02/06 05:53:56 UTC
[kudu] 01/04: txn_commit-itest: deflake
TestCommitTasksReloadOnLeadershipChange
This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 546e68cfd39e2a6f1b16bd1ddb580d1ebc97c9a4
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Fri Feb 5 18:04:23 2021 -0800
txn_commit-itest: deflake TestCommitTasksReloadOnLeadershipChange
The test shows up on the flaky test dashboard as failing around 20% of
the time. As it turns out, transferring leadership by quiescing multiple
replicas can lead to flakiness if we happen to pick a lagging replica as
the new leader.
Instead of targeting a specific tablet server as the host of the new
leaders, we'll now just quiesce the old leader tablet server and stop
quiescing the other tablet servers.
I ran the test in DEBUG mode 100 times. Before this patch, it failed 16
times; with it, it passed 100/100 times.
Change-Id: I2b27864e72888367eb0af7de59e044a9e018c31b
Reviewed-on: http://gerrit.cloudera.org:8080/17031
Tested-by: Kudu Jenkins
Reviewed-by: Hao Hao <ha...@cloudera.com>
---
src/kudu/integration-tests/txn_commit-itest.cc | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/kudu/integration-tests/txn_commit-itest.cc b/src/kudu/integration-tests/txn_commit-itest.cc
index 084435e..23fb461 100644
--- a/src/kudu/integration-tests/txn_commit-itest.cc
+++ b/src/kudu/integration-tests/txn_commit-itest.cc
@@ -749,7 +749,6 @@ class ThreeNodeTxnCommitITest : public TxnCommitITest {
*cluster_->mini_tablet_server(i)->server()->mutable_quiescing() = i != leader_idx;
}
leader_ts_ = cluster_->mini_tablet_server(leader_idx);
- non_leader_ts_ = cluster_->mini_tablet_server(leader_idx + 1);
// We should have two leaders for our table, and one for the
// TxnStatusManager.
ASSERT_EVENTUALLY([&] {
@@ -758,7 +757,6 @@ class ThreeNodeTxnCommitITest : public TxnCommitITest {
}
protected:
MiniTabletServer* leader_ts_;
- MiniTabletServer* non_leader_ts_;
};
TEST_F(ThreeNodeTxnCommitITest, TestCommitTasksReloadOnLeadershipChange) {
@@ -776,13 +774,16 @@ TEST_F(ThreeNodeTxnCommitITest, TestCommitTasksReloadOnLeadershipChange) {
ASSERT_FALSE(is_complete);
FLAGS_txn_schedule_background_tasks = true;
- // Change our quiescing state and bring the previous leader down so a new
- // leader can be elected.
- auto* new_leader_ts = non_leader_ts_;
- *new_leader_ts->server()->mutable_quiescing() = false;
+ // Change our quiescing states so a new leader can be elected.
*leader_ts_->server()->mutable_quiescing() = true;
+ for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+ auto* mts = cluster_->mini_tablet_server(i);
+ if (leader_ts_ != mts) {
+ *mts->server()->mutable_quiescing() = false;
+ }
+ }
ASSERT_EVENTUALLY([&] {
- ASSERT_EQ(3, new_leader_ts->server()->num_raft_leaders()->value());
+ ASSERT_EQ(0, leader_ts_->server()->num_raft_leaders()->value());
});
// Upon becoming leader, we should have started our commit task and completed
// the commit.