You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ba...@apache.org on 2021/02/19 20:09:21 UTC

[kudu] branch master updated (4a88bde -> 645f9dc)

This is an automated email from the ASF dual-hosted git repository.

bankim pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 4a88bde  [master] fix race between shutdown and TxnManager init
     new df61b71  [tool] KUDU-2181 CLI to add master
     new 645f9dc  [tool] KUDU-2181 CLI to remove master

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/master_proxy_rpc.cc          |  20 ++-
 src/kudu/master/dynamic_multi_master-test.cc | 233 +++++++++++++++------------
 src/kudu/master/master.proto                 |   3 +
 src/kudu/master/master_service.cc            |  16 +-
 src/kudu/tools/kudu-tool-test.cc             |  12 ++
 src/kudu/tools/tool_action_common.cc         |  45 +++++-
 src/kudu/tools/tool_action_common.h          |   3 +-
 src/kudu/tools/tool_action_master.cc         | 164 +++++++++++++++++++
 8 files changed, 380 insertions(+), 116 deletions(-)


[kudu] 02/02: [tool] KUDU-2181 CLI to remove master

Posted by ba...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 645f9dc72f62bc4cca8f59f9e043c37586e87f63
Author: Bankim Bhavsar <ba...@cloudera.com>
AuthorDate: Wed Jan 27 15:44:02 2021 -0800

    [tool] KUDU-2181 CLI to remove master
    
    This change adds CLI to remove master from the cluster
    which in turn invokes the RemoveMaster ChangeConfig RPC.
    
    This CLI will be part of the workflow to remove master
    from an existing cluster or recover dead master at the
    same HostPort.
    
    Change-Id: I5c97b03475b0ffc7b387d7dfc17acc4b13858fb7
    Reviewed-on: http://gerrit.cloudera.org:8080/17010
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Bankim Bhavsar <ba...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/client/master_proxy_rpc.cc          |   3 +
 src/kudu/master/dynamic_multi_master-test.cc | 125 +++++++++++++++------------
 src/kudu/tools/kudu-tool-test.cc             |   7 +-
 src/kudu/tools/tool_action_common.cc         |  12 +++
 src/kudu/tools/tool_action_master.cc         |  50 +++++++++++
 5 files changed, 141 insertions(+), 56 deletions(-)

diff --git a/src/kudu/client/master_proxy_rpc.cc b/src/kudu/client/master_proxy_rpc.cc
index 9d54f0a..92c7bda 100644
--- a/src/kudu/client/master_proxy_rpc.cc
+++ b/src/kudu/client/master_proxy_rpc.cc
@@ -76,6 +76,8 @@ using master::ListTabletServersRequestPB;
 using master::ListTabletServersResponsePB;
 using master::MasterServiceProxy;
 using master::MasterErrorPB;
+using master::RemoveMasterRequestPB;
+using master::RemoveMasterResponsePB;
 using master::ReplaceTabletRequestPB;
 using master::ReplaceTabletResponsePB;
 using rpc::BackoffType;
@@ -299,6 +301,7 @@ template class AsyncLeaderMasterRpc<IsCreateTableDoneRequestPB, IsCreateTableDon
 template class AsyncLeaderMasterRpc<ListMastersRequestPB, ListMastersResponsePB>;
 template class AsyncLeaderMasterRpc<ListTablesRequestPB, ListTablesResponsePB>;
 template class AsyncLeaderMasterRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>;
+template class AsyncLeaderMasterRpc<RemoveMasterRequestPB, RemoveMasterResponsePB>;
 template class AsyncLeaderMasterRpc<ReplaceTabletRequestPB, ReplaceTabletResponsePB>;
 
 } // namespace internal
diff --git a/src/kudu/master/dynamic_multi_master-test.cc b/src/kudu/master/dynamic_multi_master-test.cc
index 9789d98..b6d5593 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -319,29 +319,6 @@ class DynamicMultiMasterTest : public KuduTest {
     ASSERT_EQ(orig_num_masters_, resp.masters_size());
   }
 
-  // Remove the master specified by 'hp' and optional 'master_uuid' from the cluster.
-  // Unset 'hp' can be used to indicate to not supply RPC address in the RemoveMaster RPC request.
-  Status RemoveMasterFromCluster(const HostPort& hp, const string& master_uuid = "") {
-    auto remove_master = [&] (int leader_master_idx) {
-      RemoveMasterRequestPB req;
-      RemoveMasterResponsePB resp;
-      RpcController rpc;
-      if (hp != HostPort()) {
-        *req.mutable_rpc_addr() = HostPortToPB(hp);
-      }
-      if (!master_uuid.empty()) {
-        *req.mutable_master_uuid() = master_uuid;
-      }
-      rpc.RequireServerFeature(MasterFeatures::DYNAMIC_MULTI_MASTER);
-      Status s = cluster_->master_proxy(leader_master_idx)->RemoveMaster(req, &resp, &rpc);
-      boost::optional<MasterErrorPB::Code> err_code(resp.has_error(), resp.error().code());
-      return std::make_pair(s, err_code);
-    };
-
-    RETURN_NOT_OK(RunLeaderMasterRPC(remove_master));
-    return cluster_->RemoveMaster(hp);
-  }
-
   // Fetch a follower (non-leader) master index from the cluster.
   Status GetFollowerMasterIndex(int* follower_master_idx) {
     int leader_master_idx;
@@ -375,7 +352,7 @@ class DynamicMultiMasterTest : public KuduTest {
     }
 
     vector<string> cmd = {"master", "add", JoinStrings(addresses, ",")};
-    if (master != HostPort()) {
+    if (master.Initialized()) {
       cmd.emplace_back(master.ToString());
     }
     if (wait_secs != 0) {
@@ -390,6 +367,31 @@ class DynamicMultiMasterTest : public KuduTest {
     return cluster_->AddMaster(new_master_);
   }
 
+  // Removes the specified master from the cluster using the CLI tool.
+  // Unset 'master_to_remove' can be used to indicate to not supply master address.
+  // Returns generic RuntimeError() on failure with the actual error in the optional 'err'
+  // output parameter.
+  Status RemoveMasterFromClusterUsingCLITool(const HostPort& master_to_remove,
+                                             string* err = nullptr,
+                                             const string& master_uuid = "") {
+    auto hps = cluster_->master_rpc_addrs();
+    vector<string> addresses;
+    addresses.reserve(hps.size());
+    for (const auto& hp : hps) {
+      addresses.emplace_back(hp.ToString());
+    }
+
+    vector<string> args = {"master", "remove", JoinStrings(addresses, ",")};
+    if (master_to_remove.Initialized()) {
+      args.push_back(master_to_remove.ToString());
+    }
+    if (!master_uuid.empty()) {
+      args.push_back("--master_uuid=" + master_uuid);
+    }
+    RETURN_NOT_OK(tools::RunKuduTool(args, nullptr, err));
+    return cluster_->RemoveMaster(master_to_remove);
+  }
+
   // Verify one of the 'expected_roles' and 'expected_member_type' of the new master by
   // making RPC to it directly.
   void VerifyNewMasterDirectly(const set<consensus::RaftPeerPB::Role>& expected_roles,
@@ -853,7 +855,7 @@ TEST_P(ParameterizedRemoveMasterTest, TestRemoveMaster) {
 
   // Verify the master to be removed is part of the list of masters.
   ASSERT_NE(std::find(master_hps.begin(), master_hps.end(), master_to_remove), master_hps.end());
-  ASSERT_OK(RemoveMasterFromCluster(master_to_remove));
+  ASSERT_OK(RemoveMasterFromClusterUsingCLITool(master_to_remove, nullptr, master_to_remove_uuid));
 
   // Verify we have one master less and the desired master was removed.
   vector<HostPort> updated_master_hps;
@@ -868,9 +870,10 @@ TEST_P(ParameterizedRemoveMasterTest, TestRemoveMaster) {
   NO_FATALS(cv.CheckRowCount(kTableName, ClusterVerifier::EXACTLY, 0));
 
   // Removing the same master again should result in an error
-  Status s = RemoveMasterFromCluster(master_to_remove, master_to_remove_uuid);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
-  ASSERT_STR_CONTAINS(s.ToString(), Substitute("Master $0 not found", master_to_remove.ToString()));
+  string err;
+  Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err, master_to_remove_uuid);
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+  ASSERT_STR_CONTAINS(err, Substitute("Master $0 not found", master_to_remove.ToString()));
 
   // Attempt transferring leadership to the removed master
   s = TransferMasterLeadershipAsync(cluster_.get(), master_to_remove_uuid);
@@ -964,20 +967,26 @@ TEST_F(DynamicMultiMasterTest, TestRemoveMasterFeatureFlagNotSpecified) {
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
 
   // Try removing non-leader master.
-  int non_leader_master_idx = -1;
-  ASSERT_OK(GetFollowerMasterIndex(&non_leader_master_idx));
-  auto master_to_remove = cluster_->master(non_leader_master_idx)->bound_rpc_hostport();
-  Status s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
-  ASSERT_STR_MATCHES(s.ToString(), "unsupported feature flags");
+  {
+    int non_leader_master_idx = -1;
+    ASSERT_OK(GetFollowerMasterIndex(&non_leader_master_idx));
+    auto master_to_remove = cluster_->master(non_leader_master_idx)->bound_rpc_hostport();
+    string err;
+    Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+    ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+    ASSERT_STR_MATCHES(err, "Cluster does not support RemoveMaster");
+  }
 
   // Try removing leader master
-  int leader_master_idx = -1;
-  ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
-  master_to_remove = cluster_->master(leader_master_idx)->bound_rpc_hostport();
-  s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError());
-  ASSERT_STR_MATCHES(s.ToString(), "unsupported feature flags");
+  {
+    int leader_master_idx = -1;
+    ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
+    auto master_to_remove = cluster_->master(leader_master_idx)->bound_rpc_hostport();
+    string err;
+    Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+    ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+    ASSERT_STR_MATCHES(err, "Cluster does not support RemoveMaster");
+  }
 
   // Verify no change in number of masters.
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
@@ -1045,6 +1054,8 @@ TEST_F(DynamicMultiMasterTest, TestRemoveMasterToNonLeader) {
   int non_leader_master_idx = !leader_master_idx;
   *req.mutable_rpc_addr() = HostPortToPB(cluster_->master(leader_master_idx)->bound_rpc_hostport());
   rpc.RequireServerFeature(MasterFeatures::DYNAMIC_MULTI_MASTER);
+  // Using the master proxy directly instead of using CLI as this test wants to test
+  // invoking RemoveMaster RPC to non-leader master.
   ASSERT_OK(cluster_->master_proxy(non_leader_master_idx)->RemoveMaster(req, &resp, &rpc));
   ASSERT_TRUE(resp.has_error());
   ASSERT_EQ(MasterErrorPB::NOT_THE_LEADER, resp.error().code());
@@ -1102,17 +1113,19 @@ TEST_F(DynamicMultiMasterTest, TestRemoveMasterMissingAndIncorrectHostname) {
 
   // Empty HostPort.
   {
-    Status actual = RemoveMasterFromCluster(HostPort(), string() /* unspecified master */);
-    ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-    ASSERT_STR_CONTAINS(actual.ToString(), "RPC address of the master to be removed not supplied");
+    string err;
+    Status actual = RemoveMasterFromClusterUsingCLITool(HostPort(), &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err, "must provide positional argument master_address");
   }
 
   // Non-existent hostname.
   {
     HostPort dummy_hp = HostPort("non-existent-path.local", Master::kDefaultPort);
-    Status actual = RemoveMasterFromCluster(dummy_hp);
-    ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-    ASSERT_STR_CONTAINS(actual.ToString(), Substitute("Master $0 not found", dummy_hp.ToString()));
+    string err;
+    Status actual = RemoveMasterFromClusterUsingCLITool(dummy_hp, &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err, Substitute("Master $0 not found", dummy_hp.ToString()));
   }
 
   // Verify no change in number of masters.
@@ -1133,9 +1146,10 @@ TEST_F(DynamicMultiMasterTest, TestRemoveMasterMismatchHostnameAndUuid) {
   auto random_uuid = std::to_string(rng.Next64());
   auto master_to_remove = cluster_->master(0)->bound_rpc_hostport();
   ASSERT_NE(random_uuid, cluster_->master(0)->uuid());
-  Status actual = RemoveMasterFromCluster(master_to_remove, random_uuid);
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_CONTAINS(actual.ToString(),
+  string err;
+  Status actual = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err, random_uuid);
+  ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+  ASSERT_STR_CONTAINS(err,
                       Substitute("Mismatch in UUID of the master $0 to be removed. "
                                  "Expected: $1, supplied: $2.", master_to_remove.ToString(),
                                  cluster_->master(0)->uuid(), random_uuid));
@@ -1171,15 +1185,16 @@ TEST_P(ParameterizedRemoveLeaderMasterTest, TestRemoveLeaderMaster) {
   int leader_master_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx));
   const auto master_to_remove = cluster_->master(leader_master_idx)->bound_rpc_hostport();
-  Status s = RemoveMasterFromCluster(master_to_remove);
-  ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
+  string err;
+  Status s = RemoveMasterFromClusterUsingCLITool(master_to_remove, &err);
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
   if (orig_num_masters_ == 1) {
-    ASSERT_STR_CONTAINS(s.ToString(), Substitute("Can't remove master $0 in a single master "
-                                                 "configuration", master_to_remove.ToString()));
+    ASSERT_STR_CONTAINS(err, Substitute("Can't remove master $0 in a single master "
+                                        "configuration", master_to_remove.ToString()));
   } else {
     ASSERT_GT(orig_num_masters_, 1);
-    ASSERT_STR_CONTAINS(s.ToString(), Substitute("Can't remove the leader master $0",
-                                                 master_to_remove.ToString()));
+    ASSERT_STR_CONTAINS(err, Substitute("Can't remove the leader master $0",
+                                        master_to_remove.ToString()));
   }
 
   // Verify no change in number of masters.
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 2b7b316..14541fb 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1107,7 +1107,8 @@ TEST_F(ToolTest, TestModeHelp) {
         "status.*Get the status",
         "timestamp.*Get the current timestamp",
         "list.*List masters in a Kudu cluster",
-        "add.*Add a master to the Raft configuration"
+        "add.*Add a master to the Raft configuration",
+        "remove.*Remove a master from the Raft configuration"
     };
     NO_FATALS(RunTestHelp(kCmd, kMasterModeRegexes));
     NO_FATALS(RunTestHelpRpcFlags(kCmd,
@@ -1130,6 +1131,8 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     NO_FATALS(RunTestHelp("master add --help",
                           {"-wait_secs \\(Timeout in seconds to wait for the newly added master"}));
+    NO_FATALS(RunTestHelp("master remove --help",
+                          {"-master_uuid \\(Permanent UUID of the master"}));
   }
   {
     const vector<string> kPbcModeRegexes = {
@@ -1321,6 +1324,8 @@ TEST_F(ToolTest, TestActionMissingRequiredArg) {
   NO_FATALS(RunActionMissingRequiredArg("master list", "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("master add", "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("master add master.example.com", "master_address"));
+  NO_FATALS(RunActionMissingRequiredArg("master remove", "master_addresses"));
+  NO_FATALS(RunActionMissingRequiredArg("master remove master.example.com", "master_address"));
   NO_FATALS(RunActionMissingRequiredArg("cluster ksck --master_addresses=master.example.com",
                                         "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("local_replica cmeta rewrite_raft_config fake_id",
diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc
index 2ad498f..661ded3 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -984,6 +984,18 @@ Status LeaderMasterProxy::SyncRpc(
 
 template
 Status LeaderMasterProxy::SyncRpc(
+    const master::RemoveMasterRequestPB& req,
+    master::RemoveMasterResponsePB* resp,
+    string func_name,
+    const std::function<void(MasterServiceProxy*,
+                             const master::RemoveMasterRequestPB&,
+                             master::RemoveMasterResponsePB*,
+                             RpcController*,
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
+template
+Status LeaderMasterProxy::SyncRpc(
     const master::ReplaceTabletRequestPB& req,
     master::ReplaceTabletResponsePB* resp,
     string func_name,
diff --git a/src/kudu/tools/tool_action_master.cc b/src/kudu/tools/tool_action_master.cc
index 7216d38..e3da814 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -59,6 +59,8 @@ DECLARE_bool(force);
 DECLARE_int64(timeout_ms);
 DECLARE_string(columns);
 
+DEFINE_string(master_uuid, "", "Permanent UUID of the master. Only needed to disambiguate in case "
+                               "of multiple masters with same RPC address");
 DEFINE_int64(wait_secs, 8,
              "Timeout in seconds to wait for the newly added master to be promoted as VOTER.");
 
@@ -72,6 +74,8 @@ using kudu::master::Master;
 using kudu::master::MasterServiceProxy;
 using kudu::master::RefreshAuthzCacheRequestPB;
 using kudu::master::RefreshAuthzCacheResponsePB;
+using kudu::master::RemoveMasterRequestPB;
+using kudu::master::RemoveMasterResponsePB;
 using kudu::consensus::RaftPeerPB;
 using kudu::rpc::RpcController;
 using std::cout;
@@ -218,6 +222,38 @@ Status AddMasterChangeConfig(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status RemoveMasterChangeConfig(const RunnerContext& context) {
+  const string& master_hp_str = FindOrDie(context.required_args, kMasterAddressArg);
+  HostPort hp;
+  RETURN_NOT_OK(hp.ParseString(master_hp_str, Master::kDefaultPort));
+
+  LeaderMasterProxy proxy;
+  RETURN_NOT_OK(proxy.Init(context));
+
+  RemoveMasterRequestPB req;
+  RemoveMasterResponsePB resp;
+  *req.mutable_rpc_addr() = HostPortToPB(hp);
+  if (!FLAGS_master_uuid.empty()) {
+    *req.mutable_master_uuid() = FLAGS_master_uuid;
+  }
+
+  RETURN_NOT_OK((proxy.SyncRpc<RemoveMasterRequestPB, RemoveMasterResponsePB>(
+                 req, &resp, "RemoveMaster", &MasterServiceProxy::RemoveMasterAsync,
+                 { master::MasterFeatures::DYNAMIC_MULTI_MASTER })));
+
+  if (resp.has_error()) {
+    return StatusFromPB(resp.error().status());
+  }
+
+  LOG(INFO) << Substitute("Successfully removed master $0 from the cluster. Please follow the next "
+                          "steps from the Kudu administration documentation on "
+                          "\"Removing Kudu Masters from a Multi-Master Deployment\" or "
+                          "\"Recovering from a dead Kudu Master in a Multi-Master Deployment\" "
+                          "as appropriate.",
+                          hp.ToString());
+  return Status::OK();
+}
+
 Status ListMasters(const RunnerContext& context) {
   LeaderMasterProxy proxy;
   RETURN_NOT_OK(proxy.Init(context));
@@ -534,6 +570,20 @@ unique_ptr<Mode> BuildMasterMode() {
         .Build();
     builder.AddAction(std::move(add_master));
   }
+  {
+    unique_ptr<Action> remove_master =
+        ActionBuilder("remove", &RemoveMasterChangeConfig)
+        .Description("Remove a master from the Raft configuration of the Kudu cluster. "
+                     "Please refer to the Kudu administration documentation on "
+                     "\"Removing Kudu Masters from a Multi-Master Deployment\" or "
+                     "\"Recovering from a dead Kudu Master in a Multi-Master Deployment\" "
+                     "as appropriate.")
+        .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
+        .AddRequiredParameter({ kMasterAddressArg, kMasterAddressDesc })
+        .AddOptionalParameter("master_uuid")
+        .Build();
+    builder.AddAction(std::move(remove_master));
+  }
 
   return builder.Build();
 }


[kudu] 01/02: [tool] KUDU-2181 CLI to add master

Posted by ba...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit df61b71f0093008f6f2cdc88b4380a575f42bf18
Author: Bankim Bhavsar <ba...@cloudera.com>
AuthorDate: Wed Sep 30 16:24:04 2020 -0700

    [tool] KUDU-2181 CLI to add master
    
    This change adds a CLI that invokes the AddMaster RPC to
    perform Raft ChangeConfig.
    
    This CLI will be part of the workflow to migrate to multiple
    masters in a Kudu cluster.
    
    Change-Id: I507f301d1aba17327eb35728eed0d765e86ef4cc
    Reviewed-on: http://gerrit.cloudera.org:8080/16530
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Bankim Bhavsar <ba...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/client/master_proxy_rpc.cc          |  17 ++--
 src/kudu/master/dynamic_multi_master-test.cc | 110 +++++++++++++++-----------
 src/kudu/master/master.proto                 |   3 +
 src/kudu/master/master_service.cc            |  16 +++-
 src/kudu/tools/kudu-tool-test.cc             |   7 ++
 src/kudu/tools/tool_action_common.cc         |  33 ++++++--
 src/kudu/tools/tool_action_common.h          |   3 +-
 src/kudu/tools/tool_action_master.cc         | 114 +++++++++++++++++++++++++++
 8 files changed, 241 insertions(+), 62 deletions(-)

diff --git a/src/kudu/client/master_proxy_rpc.cc b/src/kudu/client/master_proxy_rpc.cc
index 815482d..9d54f0a 100644
--- a/src/kudu/client/master_proxy_rpc.cc
+++ b/src/kudu/client/master_proxy_rpc.cc
@@ -46,6 +46,8 @@ using strings::Substitute;
 
 namespace kudu {
 
+using master::AddMasterRequestPB;
+using master::AddMasterResponsePB;
 using master::AlterTableRequestPB;
 using master::AlterTableResponsePB;
 using master::ChangeTServerStateRequestPB;
@@ -259,7 +261,7 @@ bool AsyncLeaderMasterRpc<ReqClass, RespClass>::RetryOrReconnectIfNecessary(
       return true;
     }
     if (err->unsupported_feature_flags_size() > 0) {
-      s = Status::NotSupported(Substitute("Cluster is does not support $0",
+      s = Status::NotSupported(Substitute("Cluster does not support $0",
                                           rpc_name_));
     }
   }
@@ -283,19 +285,20 @@ bool AsyncLeaderMasterRpc<ReqClass, RespClass>::RetryOrReconnectIfNecessary(
   return false;
 }
 
+template class AsyncLeaderMasterRpc<AddMasterRequestPB, AddMasterResponsePB>;
+template class AsyncLeaderMasterRpc<AlterTableRequestPB, AlterTableResponsePB>;
 template class AsyncLeaderMasterRpc<ChangeTServerStateRequestPB, ChangeTServerStateResponsePB>;
 template class AsyncLeaderMasterRpc<CreateTableRequestPB, CreateTableResponsePB>;
-template class AsyncLeaderMasterRpc<IsCreateTableDoneRequestPB, IsCreateTableDoneResponsePB>;
 template class AsyncLeaderMasterRpc<DeleteTableRequestPB, DeleteTableResponsePB>;
-template class AsyncLeaderMasterRpc<AlterTableRequestPB, AlterTableResponsePB>;
-template class AsyncLeaderMasterRpc<IsAlterTableDoneRequestPB, IsAlterTableDoneResponsePB>;
-template class AsyncLeaderMasterRpc<GetTableSchemaRequestPB, GetTableSchemaResponsePB>;
 template class AsyncLeaderMasterRpc<GetTableLocationsRequestPB, GetTableLocationsResponsePB>;
-template class AsyncLeaderMasterRpc<GetTabletLocationsRequestPB, GetTabletLocationsResponsePB>;
+template class AsyncLeaderMasterRpc<GetTableSchemaRequestPB, GetTableSchemaResponsePB>;
 template class AsyncLeaderMasterRpc<GetTableStatisticsRequestPB, GetTableStatisticsResponsePB>;
+template class AsyncLeaderMasterRpc<GetTabletLocationsRequestPB, GetTabletLocationsResponsePB>;
+template class AsyncLeaderMasterRpc<IsAlterTableDoneRequestPB, IsAlterTableDoneResponsePB>;
+template class AsyncLeaderMasterRpc<IsCreateTableDoneRequestPB, IsCreateTableDoneResponsePB>;
+template class AsyncLeaderMasterRpc<ListMastersRequestPB, ListMastersResponsePB>;
 template class AsyncLeaderMasterRpc<ListTablesRequestPB, ListTablesResponsePB>;
 template class AsyncLeaderMasterRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>;
-template class AsyncLeaderMasterRpc<ListMastersRequestPB, ListMastersResponsePB>;
 template class AsyncLeaderMasterRpc<ReplaceTabletRequestPB, ReplaceTabletResponsePB>;
 
 } // namespace internal
diff --git a/src/kudu/master/dynamic_multi_master-test.cc b/src/kudu/master/dynamic_multi_master-test.cc
index 2ba6726..9789d98 100644
--- a/src/kudu/master/dynamic_multi_master-test.cc
+++ b/src/kudu/master/dynamic_multi_master-test.cc
@@ -59,7 +59,6 @@
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/net/socket.h"
 #include "kudu/util/random.h"
-#include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -320,26 +319,6 @@ class DynamicMultiMasterTest : public KuduTest {
     ASSERT_EQ(orig_num_masters_, resp.masters_size());
   }
 
-  // Adds the specified master to the cluster returning the appropriate error Status for negative
-  // test cases.
-  Status AddMasterToCluster(const HostPort& master) {
-    auto add_master = [&] (int leader_master_idx) {
-      AddMasterRequestPB req;
-      AddMasterResponsePB resp;
-      RpcController rpc;
-      if (master != HostPort()) {
-        *req.mutable_rpc_addr() = HostPortToPB(master);
-      }
-      rpc.RequireServerFeature(MasterFeatures::DYNAMIC_MULTI_MASTER);
-      Status s = cluster_->master_proxy(leader_master_idx)->AddMaster(req, &resp, &rpc);
-      boost::optional<MasterErrorPB::Code> err_code(resp.has_error(), resp.error().code());
-      return std::make_pair(s, err_code);
-    };
-
-    RETURN_NOT_OK(RunLeaderMasterRPC(add_master));
-    return cluster_->AddMaster(new_master_);
-  }
-
   // Remove the master specified by 'hp' and optional 'master_uuid' from the cluster.
   // Unset 'hp' can be used to indicate to not supply RPC address in the RemoveMaster RPC request.
   Status RemoveMasterFromCluster(const HostPort& hp, const string& master_uuid = "") {
@@ -381,6 +360,36 @@ class DynamicMultiMasterTest : public KuduTest {
     return Status::OK();
   }
 
+  // Adds the specified master to the cluster using the CLI tool.
+  // Unset 'master' can be used to indicate to not supply master address.
+  // Optional 'wait_secs' can be used to supply wait timeout to the master add CLI tool.
+  // Returns generic RuntimeError() on failure with the actual error in the optional 'err'
+  // output parameter.
+  Status AddMasterToClusterUsingCLITool(const HostPort& master, string* err = nullptr,
+                                        int wait_secs = 0) {
+    auto hps = cluster_->master_rpc_addrs();
+    vector<string> addresses;
+    addresses.reserve(hps.size());
+    for (const auto& hp : hps) {
+      addresses.emplace_back(hp.ToString());
+    }
+
+    vector<string> cmd = {"master", "add", JoinStrings(addresses, ",")};
+    if (master != HostPort()) {
+      cmd.emplace_back(master.ToString());
+    }
+    if (wait_secs != 0) {
+      cmd.emplace_back("-wait_secs=" + std::to_string(wait_secs));
+    }
+    RETURN_NOT_OK(tools::RunKuduTool(cmd, nullptr, err));
+    // master add CLI doesn't return an error if the master is already present.
+    // So don't try adding to the ExternalMiniCluster.
+    if (err != nullptr && err->find("Master already present") != string::npos)  {
+      return Status::OK();
+    }
+    return cluster_->AddMaster(new_master_);
+  }
+
   // Verify one of the 'expected_roles' and 'expected_member_type' of the new master by
   // making RPC to it directly.
   void VerifyNewMasterDirectly(const set<consensus::RaftPeerPB::Role>& expected_roles,
@@ -639,7 +648,7 @@ TEST_P(ParameterizedAddMasterTest, TestAddMasterCatchupFromWAL) {
   // Bring up the new master and add to the cluster.
   master_hps.emplace_back(reserved_hp_);
   NO_FATALS(StartNewMaster(master_hps));
-  ASSERT_OK(AddMasterToCluster(reserved_hp_));
+  ASSERT_OK(AddMasterToClusterUsingCLITool(reserved_hp_, nullptr, 4));
 
   // Newly added master will be caught up from WAL itself without requiring tablet copy
   // since the system catalog is fresh with a single table.
@@ -666,18 +675,18 @@ TEST_P(ParameterizedAddMasterTest, TestAddMasterCatchupFromWAL) {
   VerifyNewMasterDirectly({ consensus::RaftPeerPB::FOLLOWER, consensus::RaftPeerPB::LEADER },
                           consensus::RaftPeerPB::VOTER);
 
-  // Adding the same master again should return an error.
+  // Adding the same master again should print a message but not throw an error.
   {
-    Status s = AddMasterToCluster(reserved_hp_);
-    ASSERT_TRUE(s.IsRemoteError());
-    ASSERT_STR_CONTAINS(s.message().ToString(), "Master already present");
+    string err;
+    ASSERT_OK(AddMasterToClusterUsingCLITool(reserved_hp_, &err));
+    ASSERT_STR_CONTAINS(err, "Master already present");
   }
 
-  // Adding one of the former masters should return an error.
+  // Adding one of the former masters should print a message but not throw an error.
   {
-    Status s = AddMasterToCluster(master_hps[0]);
-    ASSERT_TRUE(s.IsRemoteError()) << s.ToString();
-    ASSERT_STR_CONTAINS(s.message().ToString(), "Master already present");
+    string err;
+    ASSERT_OK(AddMasterToClusterUsingCLITool(master_hps[0], &err));
+    ASSERT_STR_CONTAINS(err, "Master already present");
   }
 
   NO_FATALS(VerifyClusterAfterMasterAddition(master_hps));
@@ -695,7 +704,10 @@ TEST_P(ParameterizedAddMasterTest, TestAddMasterSysCatalogCopy) {
   // Bring up the new master and add to the cluster.
   master_hps.emplace_back(reserved_hp_);
   NO_FATALS(StartNewMaster(master_hps));
-  ASSERT_OK(AddMasterToCluster(reserved_hp_));
+  string err;
+  ASSERT_OK(AddMasterToClusterUsingCLITool(reserved_hp_, &err));
+  ASSERT_STR_MATCHES(err, Substitute("Please follow the next steps which includes system catalog "
+                                     "tablet copy", reserved_hp_.ToString()));
 
   // Newly added master will be added to the master Raft config but won't be caught up
   // from the WAL and hence remain as a NON_VOTER.
@@ -908,10 +920,10 @@ TEST_F(DynamicMultiMasterTest, TestAddMasterWithNoLastKnownAddr) {
   master_hps.emplace_back(reserved_hp_);
   NO_FATALS(StartNewMaster(master_hps));
 
-  Status actual = AddMasterToCluster(reserved_hp_);
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_MATCHES(actual.ToString(),
-                     "Invalid config to set as pending: Peer:.* has no address");
+  string err;
+  Status actual = AddMasterToClusterUsingCLITool(reserved_hp_, &err);
+  ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+  ASSERT_STR_MATCHES(err, "Invalid config to set as pending: Peer:.* has no address");
 
   // Verify no change in number of masters.
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
@@ -932,9 +944,10 @@ TEST_F(DynamicMultiMasterTest, TestAddMasterFeatureFlagNotSpecified) {
   master_hps.emplace_back(reserved_hp_);
   NO_FATALS(StartNewMaster(master_hps, false /* master_supports_change_config */));
 
-  Status actual = AddMasterToCluster(reserved_hp_);
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_MATCHES(actual.ToString(), "unsupported feature flags");
+  string err;
+  Status actual = AddMasterToClusterUsingCLITool(reserved_hp_, &err);
+  ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+  ASSERT_STR_MATCHES(err, "Cluster does not support AddMaster");
 
   // Verify no change in number of masters.
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
@@ -1056,15 +1069,22 @@ TEST_F(DynamicMultiMasterTest, TestAddMasterMissingAndIncorrectAddress) {
   NO_FATALS(StartNewMaster(master_hps));
 
   // Empty HostPort
-  Status actual = AddMasterToCluster(HostPort());
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_CONTAINS(actual.ToString(), "RPC address of master to be added not supplied");
+  {
+    string err;
+    Status actual = AddMasterToClusterUsingCLITool(HostPort(), &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err, "must provide positional argument master_address");
+  }
 
   // Non-routable incorrect hostname.
-  actual = AddMasterToCluster(HostPort("non-existent-path.local", Master::kDefaultPort));
-  ASSERT_TRUE(actual.IsRemoteError()) << actual.ToString();
-  ASSERT_STR_CONTAINS(actual.ToString(),
-                      "Network error: unable to resolve address for non-existent-path.local");
+  {
+    string err;
+    Status actual = AddMasterToClusterUsingCLITool(
+        HostPort("non-existent-path.local", Master::kDefaultPort), &err);
+    ASSERT_TRUE(actual.IsRuntimeError()) << actual.ToString();
+    ASSERT_STR_CONTAINS(err,
+                        "Network error: unable to resolve address for non-existent-path.local");
+  }
 
   // Verify no change in number of masters.
   NO_FATALS(VerifyNumMastersAndGetAddresses(orig_num_masters_, &master_hps));
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index 1d19ec8..2d64f41 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -84,6 +84,9 @@ message MasterErrorPB {
 
     // The caller is not authorized to perform the attempted operation.
     NOT_AUTHORIZED = 14;
+
+    // Master is already part of the Raft configuration.
+    MASTER_ALREADY_PRESENT = 15;
   }
 
   // The error code.
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index f40faa0..697cdf8 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -262,10 +262,20 @@ void MasterServiceImpl::AddMaster(const AddMasterRequestPB* req,
     return;
   }
 
-  Status s = server_->AddMaster(HostPortFromPB(req->rpc_addr()), rpc);
+  HostPort hp = HostPortFromPB(req->rpc_addr());
+  Status s = server_->AddMaster(hp, rpc);
   if (!s.ok()) {
-    LOG(ERROR) << Substitute("Failed adding master $0:$1. $2", req->rpc_addr().host(),
-                             req->rpc_addr().port(), s.ToString());
+    // Special handling for master already present error for retry scenarios.
+    // Responding back using RespondFailure() will clobber the error code
+    // and hence responding with success and setting the error code.
+    if (s.IsAlreadyPresent()) {
+      LOG(WARNING) << Substitute("Master $0 already present", hp.ToString());
+      StatusToPB(s, resp->mutable_error()->mutable_status());
+      resp->mutable_error()->set_code(MasterErrorPB::MASTER_ALREADY_PRESENT);
+      rpc->RespondSuccess();
+      return;
+    }
+    LOG(ERROR) << Substitute("Failed adding master $0. $1", hp.ToString(), s.ToString());
     rpc->RespondFailure(s);
     return;
   }
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 6a9bfb1..2b7b316 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1107,6 +1107,7 @@ TEST_F(ToolTest, TestModeHelp) {
         "status.*Get the status",
         "timestamp.*Get the current timestamp",
         "list.*List masters in a Kudu cluster",
+        "add.*Add a master to the Raft configuration"
     };
     NO_FATALS(RunTestHelp(kCmd, kMasterModeRegexes));
     NO_FATALS(RunTestHelpRpcFlags(kCmd,
@@ -1127,6 +1128,10 @@ TEST_F(ToolTest, TestModeHelp) {
     NO_FATALS(RunTestHelpRpcFlags(kSubCmd, {"refresh"}));
   }
   {
+    NO_FATALS(RunTestHelp("master add --help",
+                          {"-wait_secs \\(Timeout in seconds to wait for the newly added master"}));
+  }
+  {
     const vector<string> kPbcModeRegexes = {
         "dump.*Dump a PBC",
         "edit.*Edit a PBC \\(protobuf container\\) file",
@@ -1314,6 +1319,8 @@ TEST_F(ToolTest, TestActionHelp) {
 
 TEST_F(ToolTest, TestActionMissingRequiredArg) {
   NO_FATALS(RunActionMissingRequiredArg("master list", "master_addresses"));
+  NO_FATALS(RunActionMissingRequiredArg("master add", "master_addresses"));
+  NO_FATALS(RunActionMissingRequiredArg("master add master.example.com", "master_address"));
   NO_FATALS(RunActionMissingRequiredArg("cluster ksck --master_addresses=master.example.com",
                                         "master_addresses"));
   NO_FATALS(RunActionMissingRequiredArg("local_replica cmeta rewrite_raft_config fake_id",
diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc
index 2b45263..2ad498f 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -922,11 +922,13 @@ Status LeaderMasterProxy::SyncRpc(const Req& req,
                                   const std::function<void(master::MasterServiceProxy*,
                                                            const Req&, Resp*,
                                                            rpc::RpcController*,
-                                                           const ResponseCallback&)>& func) {
+                                                           const ResponseCallback&)>& func,
+                                  std::vector<uint32_t> required_feature_flags) {
   MonoTime deadline = MonoTime::Now() + MonoDelta::FromMilliseconds(FLAGS_timeout_ms);
   Synchronizer sync;
   AsyncLeaderMasterRpc<Req, Resp> rpc(deadline, client_.get(), BackoffType::EXPONENTIAL,
-      req, resp, func, std::move(func_name), sync.AsStatusCallback(), {});
+      req, resp, func, std::move(func_name), sync.AsStatusCallback(),
+      std::move(required_feature_flags));
   rpc.SendRpc();
   return sync.Wait();
 }
@@ -934,6 +936,18 @@ Status LeaderMasterProxy::SyncRpc(const Req& req,
 // Explicit specializations for callers outside this compilation unit.
 template
 Status LeaderMasterProxy::SyncRpc(
+    const master::AddMasterRequestPB& req,
+    master::AddMasterResponsePB* resp,
+    string func_name,
+    const std::function<void(MasterServiceProxy*,
+                             const master::AddMasterRequestPB&,
+                             master::AddMasterResponsePB*,
+                             RpcController*,
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
+template
+Status LeaderMasterProxy::SyncRpc(
     const master::ChangeTServerStateRequestPB& req,
     master::ChangeTServerStateResponsePB* resp,
     string func_name,
@@ -941,7 +955,9 @@ Status LeaderMasterProxy::SyncRpc(
                              const master::ChangeTServerStateRequestPB&,
                              master::ChangeTServerStateResponsePB*,
                              RpcController*,
-                             const ResponseCallback&)>& func);
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
 template
 Status LeaderMasterProxy::SyncRpc(
     const master::ListTabletServersRequestPB& req,
@@ -951,7 +967,9 @@ Status LeaderMasterProxy::SyncRpc(
                              const master::ListTabletServersRequestPB&,
                              master::ListTabletServersResponsePB*,
                              RpcController*,
-                             const ResponseCallback&)>& func);
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
 template
 Status LeaderMasterProxy::SyncRpc(
     const master::ListMastersRequestPB& req,
@@ -961,7 +979,9 @@ Status LeaderMasterProxy::SyncRpc(
                              const master::ListMastersRequestPB&,
                              master::ListMastersResponsePB*,
                              RpcController*,
-                             const ResponseCallback&)>& func);
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
+
 template
 Status LeaderMasterProxy::SyncRpc(
     const master::ReplaceTabletRequestPB& req,
@@ -971,7 +991,8 @@ Status LeaderMasterProxy::SyncRpc(
                              const master::ReplaceTabletRequestPB&,
                              master::ReplaceTabletResponsePB*,
                              RpcController*,
-                             const ResponseCallback&)>& func);
+                             const ResponseCallback&)>& func,
+    std::vector<uint32_t> required_feature_flags);
 
 } // namespace tools
 } // namespace kudu
diff --git a/src/kudu/tools/tool_action_common.h b/src/kudu/tools/tool_action_common.h
index 9bf4d88..c97ea4e 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -288,7 +288,8 @@ class LeaderMasterProxy {
                  const std::function<void(master::MasterServiceProxy*,
                                           const Req&, Resp*,
                                           rpc::RpcController*,
-                                          const rpc::ResponseCallback&)>& func)
+                                          const rpc::ResponseCallback&)>& func,
+                 std::vector<uint32_t> required_feature_flags = {})
       WARN_UNUSED_RESULT;
 
  private:
diff --git a/src/kudu/tools/tool_action_master.cc b/src/kudu/tools/tool_action_master.cc
index b0fdeb9..7216d38 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <algorithm>
+#include <cstdint>
 #include <functional>
 #include <iostream>
 #include <iterator>
@@ -50,6 +51,7 @@
 #include "kudu/tools/tool_action_common.h"
 #include "kudu/util/init.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
 #include "kudu/util/status.h"
 #include "kudu/util/string_case.h"
 
@@ -57,6 +59,11 @@ DECLARE_bool(force);
 DECLARE_int64(timeout_ms);
 DECLARE_string(columns);
 
+DEFINE_int64(wait_secs, 8,
+             "Timeout in seconds to wait for the newly added master to be promoted as VOTER.");
+
+using kudu::master::AddMasterRequestPB;
+using kudu::master::AddMasterResponsePB;
 using kudu::master::ConnectToMasterRequestPB;
 using kudu::master::ConnectToMasterResponsePB;
 using kudu::master::ListMastersRequestPB;
@@ -116,6 +123,101 @@ Status MasterTimestamp(const RunnerContext& context) {
   return PrintServerTimestamp(address, Master::kDefaultPort);
 }
 
+Status AddMasterChangeConfig(const RunnerContext& context) {
+  const string& new_master_address = FindOrDie(context.required_args, kMasterAddressArg);
+
+  LeaderMasterProxy proxy;
+  RETURN_NOT_OK(proxy.Init(context));
+
+  HostPort hp;
+  RETURN_NOT_OK(hp.ParseString(new_master_address, Master::kDefaultPort));
+  {
+    AddMasterRequestPB req;
+    AddMasterResponsePB resp;
+    *req.mutable_rpc_addr() = HostPortToPB(hp);
+
+    Status s = proxy.SyncRpc<AddMasterRequestPB, AddMasterResponsePB>(
+        req, &resp, "AddMaster", &MasterServiceProxy::AddMasterAsync,
+        {master::MasterFeatures::DYNAMIC_MULTI_MASTER});
+    // It's possible this is a retry request in which case instead of returning
+    // the master is already present in the Raft config error we make further checks
+    // whether the master has been promoted to a VOTER.
+    bool master_already_present =
+        resp.has_error() && resp.error().code() == master::MasterErrorPB::MASTER_ALREADY_PRESENT;
+    if (!s.ok() && !master_already_present) {
+      return s;
+    }
+    if (master_already_present) {
+      LOG(INFO) << "Master already present. Checking for promotion to VOTER...";
+    }
+  }
+
+  // If the system catalog of the new master can be caught up from the WAL then the new master will
+  // be promoted to a VOTER and become a FOLLOWER. However this can take some time, so we'll
+  // try for a few seconds. It's perfectly normal for the new master to be not caught up from
+  // the WAL in which case subsequent steps of system catalog tablet copy need to be carried out
+  // as outlined in the documentation for adding a new master to Kudu cluster.
+  MonoTime deadline = MonoTime::Now() + MonoDelta::FromSeconds(FLAGS_wait_secs);
+  RaftPeerPB::Role master_role = RaftPeerPB::UNKNOWN_ROLE;
+  RaftPeerPB::MemberType master_type = RaftPeerPB::UNKNOWN_MEMBER_TYPE;
+  do {
+    ListMastersRequestPB req;
+    ListMastersResponsePB resp;
+    RETURN_NOT_OK((proxy.SyncRpc<ListMastersRequestPB, ListMastersResponsePB>(
+                   req, &resp, "ListMasters", &MasterServiceProxy::ListMastersAsync)));
+
+    if (resp.has_error()) {
+      return StatusFromPB(resp.error().status());
+    }
+
+    int i = 0;
+    bool new_master_found = false;
+    for (; i < resp.masters_size(); i++) {
+      const auto& master = resp.masters(i);
+      if (master.has_error()) {
+        LOG(WARNING) << "Failed to retrieve info for master: "
+                     << StatusFromPB(master.error()).ToString();
+        continue;
+      }
+      for (const auto& master_hp : master.registration().rpc_addresses()) {
+        if (hp == HostPortFromPB(master_hp)) {
+          // Found the newly added master
+          new_master_found = true;
+          break;
+        }
+      }
+      if (new_master_found) {
+        break;
+      }
+    }
+    if (!new_master_found) {
+      return Status::NotFound(Substitute("New master $0 not found. Retry adding the master",
+                                         hp.ToString()));
+    }
+    CHECK_LT(i, resp.masters_size());
+    const auto& master = resp.masters(i);
+    master_role = master.role();
+    master_type = master.member_type();
+    if (master_type == RaftPeerPB::VOTER &&
+        (master_role == RaftPeerPB::FOLLOWER || master_role == RaftPeerPB::LEADER)) {
+      LOG(INFO) << Substitute("Successfully added master $0 to the cluster. Please follow the "
+                              "next steps which includes updating master addresses, updating "
+                              "client configuration etc. from the Kudu administration "
+                              "documentation on \"Migrating to Multiple Kudu Masters\".",
+                              hp.ToString());
+      return Status::OK();
+    }
+    SleepFor(MonoDelta::FromMilliseconds(100));
+  } while (MonoTime::Now() < deadline);
+
+  LOG(INFO) << Substitute("New master $0 part of the Raft configuration; role: $1, member_type: "
+                          "$2. Please follow the next steps which includes system catalog tablet "
+                          "copy, updating master addresses etc. from the Kudu administration "
+                          "documentation on \"Migrating to Multiple Kudu Masters\".",
+                          hp.ToString(), master_role, master_type);
+  return Status::OK();
+}
+
 Status ListMasters(const RunnerContext& context) {
   LeaderMasterProxy proxy;
   RETURN_NOT_OK(proxy.Init(context));
@@ -420,6 +522,18 @@ unique_ptr<Mode> BuildMasterMode() {
         .Build();
     builder.AddAction(std::move(list_masters));
   }
+  {
+    unique_ptr<Action> add_master =
+        ActionBuilder("add", &AddMasterChangeConfig)
+        .Description("Add a master to the Raft configuration of the Kudu cluster. "
+                     "Please refer to the Kudu administration documentation on "
+                     "\"Migrating to Multiple Kudu Masters\" for the complete steps.")
+        .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
+        .AddRequiredParameter({ kMasterAddressArg, kMasterAddressDesc })
+        .AddOptionalParameter("wait_secs")
+        .Build();
+    builder.AddAction(std::move(add_master));
+  }
 
   return builder.Build();
 }