You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/06/24 04:22:55 UTC

[kudu] 02/03: [autn_token_expire-itest] make one scenario more robust

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 2bd3d8281fc6260c441657d378fd4d023d526b6b
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Jun 23 11:43:24 2020 -0700

    [autn_token_expire-itest] make one scenario more robust
    
    This patch addresses a TOCTOU condition in ClientReacquiresAuthnToken
    scenario of the MultiMasterIdleConnectionsITest test and replaces ad-hoc
    master re-election requests with graceful leadership transfer.
    
    I haven't seen the TOCTOU condition manifesting itself yet, but since
    I already spent time on looking around to find the reason behind the
    test failure (the root cause has been found and is fixed by
    https://gerrit.cloudera.org/#/c/16106/), it doesn't hurt to make the
    test scenario a bit more robust.
    
    Change-Id: Id06be851c92958c2c134d9e06a7dc133283aeba2
    Reviewed-on: http://gerrit.cloudera.org:8080/16107
    Reviewed-by: Grant Henke <gr...@apache.org>
    Tested-by: Alexey Serbin <as...@cloudera.com>
---
 .../integration-tests/auth_token_expire-itest.cc   | 37 ++++++++++++++--------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/kudu/integration-tests/auth_token_expire-itest.cc b/src/kudu/integration-tests/auth_token_expire-itest.cc
index 6c0806c..a77fcfd 100644
--- a/src/kudu/integration-tests/auth_token_expire-itest.cc
+++ b/src/kudu/integration-tests/auth_token_expire-itest.cc
@@ -522,13 +522,12 @@ class MultiMasterIdleConnectionsITest : public AuthTokenExpireITestBase {
 // when the client tried to open the test table after master leader re-election:
 //   Timed out: GetTableSchema timed out after deadline expired
 TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) {
-  const string kTableName = "keep-connection-to-former-master-leader";
-
   if (!AllowSlowTests()) {
     LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
     return;
   }
 
+  const string kTableName = "keep-connection-to-former-master-leader";
   const auto time_start = MonoTime::Now();
 
   shared_ptr<KuduClient> client;
@@ -560,6 +559,9 @@ TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) {
     SleepFor(MonoDelta::FromMilliseconds(250));
   }
 
+  int former_leader_master_idx;
+  ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx));
+
   // Given the relation between the master_rpc_keepalive_time_ms_ and
   // authn_token_validity_seconds_ parameters, the original authn token should
   // expire and connections to follower masters should be torn down due to
@@ -567,26 +569,33 @@ TEST_F(MultiMasterIdleConnectionsITest, ClientReacquiresAuthnToken) {
   // after waiting for additional token expiration interval.
   SleepFor(MonoDelta::FromSeconds(authn_token_validity_seconds_));
 
-  {
-    int former_leader_master_idx;
-    ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx));
-    const int leader_idx = (former_leader_master_idx + 1) % num_masters_;
-    ASSERT_EVENTUALLY([&] {
+  ASSERT_EVENTUALLY([&] {
+    // The leadership could change behind the scenes, so if a new leader master
+    // is already around, another leadership change isn't needed.
+    int leader_idx;
+    ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
+    if (former_leader_master_idx == leader_idx) {
+      // Make a request to the current leader master to step down, transferring
+      // the leadership role to other master.
       consensus::ConsensusServiceProxy proxy(
           cluster_->messenger(), cluster_->master(leader_idx)->bound_rpc_addr(),
           cluster_->master(leader_idx)->bound_rpc_hostport().host());
-      consensus::RunLeaderElectionRequestPB req;
-      req.set_tablet_id(master::SysCatalogTable::kSysCatalogTabletId);
+      consensus::LeaderStepDownRequestPB req;
       req.set_dest_uuid(cluster_->master(leader_idx)->uuid());
+      req.set_tablet_id(master::SysCatalogTable::kSysCatalogTabletId);
+      req.set_mode(consensus::LeaderStepDownMode::GRACEFUL);
+      consensus::LeaderStepDownResponsePB resp;
       rpc::RpcController rpc;
-      rpc.set_timeout(MonoDelta::FromSeconds(1));
-      consensus::RunLeaderElectionResponsePB resp;
-      ASSERT_OK(proxy.RunLeaderElection(req, &resp, &rpc));
+      rpc.set_timeout(MonoDelta::FromSeconds(3));
+      ASSERT_OK(proxy.LeaderStepDown(req, &resp, &rpc));
+
+      // Make sure the leader has actually changed. If not, the step-down
+      // request is retried until ASSERT_EVENTUALLY() times out.
       int idx;
       ASSERT_OK(cluster_->GetLeaderMasterIndex(&idx));
       ASSERT_NE(former_leader_master_idx, idx);
-    });
-  }
+    }
+  });
 
   // Try to open the table after leader master re-election. The former leader
   // responds with NOT_THE_LEADER error even if the authn token has expired