You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/08/29 21:28:09 UTC

[2/3] kudu git commit: KUDU-687: use client in ksck for master operations

KUDU-687: use client in ksck for master operations

This patch modifies ksck to use the client for all master operations,
restricting direct access only for tserver operations. In doing so, ksck can
now work with multi-master clusters, retrying operations when the leader
master dies.

Interesting things of note:
- I went back and forth on what the semantics of KsckMaster::Connect() ought
  to be. At first I thought we should ping every master, but in the end I
  settled on building the client, which just verifies the existence of a
  leader master. My justification: today's ksck isn't really concerned with
  master health; the master merely provides information that is consumed
  during the real checks: of table and tablet integrity.
- I relented on commit cf009d4 and restored a MiniCluster method to find the
  leader master. It's got the same signature as the ExternalMiniCluster
  variant so that Mike's common cluster interface (a work in progress) can
  expose it.

Change-Id: Icbd6b28ea1b2c88e02cd451095e4dec94b0ebfc3
Reviewed-on: http://gerrit.cloudera.org:8080/4147
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b5aa4a76
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b5aa4a76
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b5aa4a76

Branch: refs/heads/master
Commit: b5aa4a76c172f4cc2160c6cef7795977218b0d4c
Parents: 6a92db4
Author: Adar Dembo <ad...@cloudera.com>
Authored: Sat Aug 27 15:51:41 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Mon Aug 29 21:18:28 2016 +0000

----------------------------------------------------------------------
 src/kudu/client/schema.h                       |   2 +
 src/kudu/integration-tests/cluster_verifier.cc |   4 +-
 src/kudu/integration-tests/mini_cluster.cc     |  45 ++++++-
 src/kudu/integration-tests/mini_cluster.h      |  23 ++--
 src/kudu/tools/CMakeLists.txt                  |   2 +-
 src/kudu/tools/ksck-test.cc                    |   4 +-
 src/kudu/tools/ksck.h                          |  10 +-
 src/kudu/tools/ksck_remote-test.cc             |  55 +++++---
 src/kudu/tools/ksck_remote.cc                  | 136 +++++++-------------
 src/kudu/tools/ksck_remote.h                   |  31 ++---
 src/kudu/tools/tool_action_cluster.cc          |  19 ++-
 11 files changed, 164 insertions(+), 167 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/client/schema.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index 099185a..2f85c56 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -32,6 +32,7 @@ class Schema;
 class TestWorkload;
 
 namespace tools {
+class RemoteKsckMaster;
 class TsAdminClient;
 }
 
@@ -488,6 +489,7 @@ class KUDU_EXPORT KuduSchema {
   friend class internal::LookupRpc;
   friend class internal::MetaCacheEntry;
   friend class internal::WriteRpc;
+  friend class kudu::tools::RemoteKsckMaster;
   friend class kudu::tools::TsAdminClient;
 
   friend KuduSchema KuduSchemaFromSchema(const Schema& schema);

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/integration-tests/cluster_verifier.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_verifier.cc b/src/kudu/integration-tests/cluster_verifier.cc
index 69c4195..c701779 100644
--- a/src/kudu/integration-tests/cluster_verifier.cc
+++ b/src/kudu/integration-tests/cluster_verifier.cc
@@ -77,10 +77,10 @@ void ClusterVerifier::CheckCluster() {
 }
 
 Status ClusterVerifier::DoKsck() {
-  Sockaddr addr = cluster_->leader_master()->bound_rpc_addr();
+  HostPort hp = cluster_->leader_master()->bound_rpc_hostport();
 
   std::shared_ptr<KsckMaster> master;
-  RETURN_NOT_OK(RemoteKsckMaster::Build(addr, &master));
+  RETURN_NOT_OK(RemoteKsckMaster::Build({ hp.ToString() }, &master));
   std::shared_ptr<KsckCluster> cluster(new KsckCluster(master));
   std::shared_ptr<Ksck> ksck(new Ksck(cluster));
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/integration-tests/mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/mini_cluster.cc b/src/kudu/integration-tests/mini_cluster.cc
index 626ed2c..4cf05e6 100644
--- a/src/kudu/integration-tests/mini_cluster.cc
+++ b/src/kudu/integration-tests/mini_cluster.cc
@@ -194,34 +194,34 @@ void MiniCluster::ShutdownMasters() {
   mini_masters_.clear();
 }
 
-MiniMaster* MiniCluster::mini_master(int idx) {
+MiniMaster* MiniCluster::mini_master(int idx) const {
   CHECK_GE(idx, 0) << "Master idx must be >= 0";
   CHECK_LT(idx, mini_masters_.size()) << "Master idx must be < num masters started";
   return mini_masters_[idx].get();
 }
 
-MiniTabletServer* MiniCluster::mini_tablet_server(int idx) {
+MiniTabletServer* MiniCluster::mini_tablet_server(int idx) const {
   CHECK_GE(idx, 0) << "TabletServer idx must be >= 0";
   CHECK_LT(idx, mini_tablet_servers_.size()) << "TabletServer idx must be < 'num_ts_started_'";
   return mini_tablet_servers_[idx].get();
 }
 
-string MiniCluster::GetMasterFsRoot(int idx) {
+string MiniCluster::GetMasterFsRoot(int idx) const {
   return JoinPathSegments(fs_root_, Substitute("master-$0-root", idx));
 }
 
-string MiniCluster::GetTabletServerFsRoot(int idx) {
+string MiniCluster::GetTabletServerFsRoot(int idx) const {
   return JoinPathSegments(fs_root_, Substitute("ts-$0-root", idx));
 }
 
-Status MiniCluster::WaitForTabletServerCount(int count) {
+Status MiniCluster::WaitForTabletServerCount(int count) const {
   vector<shared_ptr<master::TSDescriptor>> descs;
   return WaitForTabletServerCount(count, MatchMode::MATCH_TSERVERS, &descs);
 }
 
 Status MiniCluster::WaitForTabletServerCount(int count,
                                              MatchMode mode,
-                                             vector<shared_ptr<TSDescriptor>>* descs) {
+                                             vector<shared_ptr<TSDescriptor>>* descs) const {
   unordered_set<int> masters_to_search;
   for (int i = 0; i < num_masters(); i++) {
     if (!mini_master(i)->master()->IsShutdown()) {
@@ -278,7 +278,7 @@ Status MiniCluster::WaitForTabletServerCount(int count,
 }
 
 Status MiniCluster::CreateClient(KuduClientBuilder* builder,
-                                 client::sp::shared_ptr<KuduClient>* client) {
+                                 client::sp::shared_ptr<KuduClient>* client) const {
   KuduClientBuilder default_builder;
   if (builder == nullptr) {
     builder = &default_builder;
@@ -291,4 +291,35 @@ Status MiniCluster::CreateClient(KuduClientBuilder* builder,
   return builder->Build(client);
 }
 
+Status MiniCluster::GetLeaderMasterIndex(int* idx) const {
+  const MonoTime kDeadline = MonoTime::Now() + MonoDelta::FromSeconds(5);
+
+  int leader_idx = -1;
+  while (MonoTime::Now() < kDeadline) {
+    for (int i = 0; i < num_masters(); i++) {
+      if (mini_master(i)->master()->IsShutdown()) {
+        continue;
+      }
+      master::CatalogManager* catalog =
+          mini_master(i)->master()->catalog_manager();
+      master::CatalogManager::ScopedLeaderSharedLock l(catalog);
+      if (l.first_failed_status().ok()) {
+        leader_idx = i;
+        break;
+      }
+    }
+    if (leader_idx == -1) {
+      SleepFor(MonoDelta::FromMilliseconds(100));
+    } else {
+      break;
+    }
+  }
+  if (leader_idx == -1) {
+    return Status::NotFound("Leader master was not found within deadline");
+  }
+
+  *idx = leader_idx;
+  return Status::OK();
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/integration-tests/mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/mini_cluster.h b/src/kudu/integration-tests/mini_cluster.h
index 8e64b3f..b54b17a 100644
--- a/src/kudu/integration-tests/mini_cluster.h
+++ b/src/kudu/integration-tests/mini_cluster.h
@@ -104,26 +104,26 @@ class MiniCluster {
   // If this cluster is configured for a single non-distributed
   // master, return the single master. Exits with a CHECK failure if
   // there are multiple masters.
-  master::MiniMaster* mini_master() {
+  master::MiniMaster* mini_master() const {
     CHECK_EQ(mini_masters_.size(), 1);
     return mini_master(0);
   }
 
   // Returns the Master at index 'idx' for this MiniCluster.
-  master::MiniMaster* mini_master(int idx);
+  master::MiniMaster* mini_master(int idx) const;
 
   // Return number of mini masters.
   int num_masters() const { return mini_masters_.size(); }
 
   // Returns the TabletServer at index 'idx' of this MiniCluster.
   // 'idx' must be between 0 and 'num_tablet_servers' -1.
-  tserver::MiniTabletServer* mini_tablet_server(int idx);
+  tserver::MiniTabletServer* mini_tablet_server(int idx) const;
 
   int num_tablet_servers() const { return mini_tablet_servers_.size(); }
 
-  std::string GetMasterFsRoot(int indx);
+  std::string GetMasterFsRoot(int indx) const;
 
-  std::string GetTabletServerFsRoot(int idx);
+  std::string GetTabletServerFsRoot(int idx) const;
 
   // Wait until the number of registered tablet servers reaches the given
   // count on all masters. Returns Status::TimedOut if the desired count is not
@@ -140,9 +140,9 @@ class MiniCluster {
     // Do not perform any matching on the retrieved tservers.
     DO_NOT_MATCH_TSERVERS,
   };
-  Status WaitForTabletServerCount(int count);
+  Status WaitForTabletServerCount(int count) const;
   Status WaitForTabletServerCount(int count, MatchMode mode,
-                                  std::vector<std::shared_ptr<master::TSDescriptor>>* descs);
+                                  std::vector<std::shared_ptr<master::TSDescriptor>>* descs) const;
 
   // Create a client configured to talk to this cluster. Builder may contain
   // override options for the client. The master address will be overridden to
@@ -151,7 +151,14 @@ class MiniCluster {
   //
   // REQUIRES: the cluster must have already been Start()ed.
   Status CreateClient(client::KuduClientBuilder* builder,
-                      client::sp::shared_ptr<client::KuduClient>* client);
+                      client::sp::shared_ptr<client::KuduClient>* client) const;
+
+  // Determine the leader master of the cluster. Sets 'idx' to the leader
+  // master's index (for calls to to mini_master()).
+  //
+  // Note: if a leader election occurs after this method is executed, the
+  // last result may not be valid.
+  Status GetLeaderMasterIndex(int* idx) const;
 
  private:
   enum {

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index fe95bad..05a72e3 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -110,7 +110,7 @@ set(KUDU_TEST_LINK_LIBS
   integration-tests
   ${KUDU_MIN_TEST_LIBS})
 ADD_KUDU_TEST(ksck-test)
-ADD_KUDU_TEST(ksck_remote-test)
+ADD_KUDU_TEST(ksck_remote-test RESOURCE_LOCK "master-rpc-ports")
 ADD_KUDU_TEST(kudu-admin-test)
 ADD_KUDU_TEST_DEPENDENCIES(kudu-admin-test
   kudu-admin)

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index aae4656..cc2d4c7 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -85,7 +85,7 @@ class MockKsckMaster : public KsckMaster {
       : fetch_info_status_(Status::OK()) {
   }
 
-  virtual Status Connect() const OVERRIDE {
+  virtual Status Connect() OVERRIDE {
     return fetch_info_status_;
   }
 
@@ -218,7 +218,7 @@ class KsckTest : public KuduTest {
                            bool is_leader,
                            bool is_running) {
     shared_ptr<KsckTabletReplica> replica(new KsckTabletReplica(assignment_plan_.back(),
-                                                                is_leader, !is_leader));
+                                                                is_leader));
     shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>(
             master_->tablet_servers_.at(assignment_plan_.back()));
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index c512831..6a61b08 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -69,9 +69,8 @@ struct ChecksumOptions {
 // Representation of a tablet replica on a tablet server.
 class KsckTabletReplica {
  public:
-  KsckTabletReplica(const std::string ts_uuid, const bool is_leader, const bool is_follower)
+  KsckTabletReplica(const std::string ts_uuid, const bool is_leader)
       : is_leader_(is_leader),
-        is_follower_(is_follower),
         is_running_(false),
         ts_uuid_(ts_uuid) {
   }
@@ -80,17 +79,12 @@ class KsckTabletReplica {
     return is_leader_;
   }
 
-  const bool& is_follower() const {
-    return is_follower_;
-  }
-
   const std::string& ts_uuid() const {
     return ts_uuid_;
   }
 
  private:
   const bool is_leader_;
-  const bool is_follower_;
   bool is_running_;
   const std::string ts_uuid_;
   DISALLOW_COPY_AND_ASSIGN(KsckTabletReplica);
@@ -259,7 +253,7 @@ class KsckMaster {
   virtual ~KsckMaster() { }
 
   // Connects to the configured Master.
-  virtual Status Connect() const = 0;
+  virtual Status Connect() = 0;
 
   // Gets the list of Tablet Servers from the Master and stores it in the passed
   // map, which is keyed on server permanent_uuid.

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/ksck_remote-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote-test.cc b/src/kudu/tools/ksck_remote-test.cc
index 7c8c303..b8857ce 100644
--- a/src/kudu/tools/ksck_remote-test.cc
+++ b/src/kudu/tools/ksck_remote-test.cc
@@ -24,11 +24,11 @@
 #include "kudu/tools/data_gen_util.h"
 #include "kudu/tools/ksck_remote.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/promise.h"
 #include "kudu/util/random.h"
 #include "kudu/util/test_util.h"
 
 DECLARE_int32(heartbeat_interval_ms);
-DECLARE_int32(tablets_batch_size_max);
 
 namespace kudu {
 namespace tools {
@@ -72,21 +72,20 @@ class RemoteKsckTest : public KuduTest {
     // Speed up testing, saves about 700ms per TEST_F.
     FLAGS_heartbeat_interval_ms = 10;
 
-    // Fetch the tablets in smaller batches to regression test a bug
-    // previously seen in the batching code.
-    FLAGS_tablets_batch_size_max = 5;
-
     MiniClusterOptions opts;
+
+    // Hard-coded ports for the masters. This is safe, as these tests run under
+    // a resource lock (see CMakeLists.txt in this directory).
+    // TODO we should have a generic method to obtain n free ports.
+    opts.master_rpc_ports = { 11010, 11011, 11012 };
+
+    opts.num_masters = opts.master_rpc_ports.size();
     opts.num_tablet_servers = 3;
     mini_cluster_.reset(new MiniCluster(env_.get(), opts));
     ASSERT_OK(mini_cluster_->Start());
 
-    master_rpc_addr_ = mini_cluster_->mini_master()->bound_rpc_addr();
-
     // Connect to the cluster.
-    ASSERT_OK(client::KuduClientBuilder()
-                     .add_master_server_addr(master_rpc_addr_.ToString())
-                     .Build(&client_));
+    ASSERT_OK(mini_cluster_->CreateClient(nullptr, &client_));
 
     // Create one table.
     gscoped_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
@@ -97,11 +96,18 @@ class RemoteKsckTest : public KuduTest {
                      .split_rows(GenerateSplitRows())
                      .Create());
     // Make sure we can open the table.
-    ASSERT_OK(client_->OpenTable(kTableName, &client_table_));
+    shared_ptr<KuduTable> client_table;
+    ASSERT_OK(client_->OpenTable(kTableName, &client_table));
 
-    ASSERT_OK(RemoteKsckMaster::Build(master_rpc_addr_, &master_));
-    cluster_.reset(new KsckCluster(master_));
-    ksck_.reset(new Ksck(cluster_));
+    vector<string> master_addresses;
+    for (int i = 0; i < mini_cluster_->num_masters(); i++) {
+        master_addresses.push_back(
+            mini_cluster_->mini_master(i)->bound_rpc_addr_str());
+    }
+    std::shared_ptr<KsckMaster> master;
+    ASSERT_OK(RemoteKsckMaster::Build(master_addresses, &master));
+    std::shared_ptr<KsckCluster> cluster(new KsckCluster(master));
+    ksck_.reset(new Ksck(cluster));
   }
 
   virtual void TearDown() OVERRIDE {
@@ -180,6 +186,7 @@ class RemoteKsckTest : public KuduTest {
     return Status::OK();
   }
 
+  std::shared_ptr<MiniCluster> mini_cluster_;
   std::shared_ptr<Ksck> ksck_;
   shared_ptr<client::KuduClient> client_;
 
@@ -187,12 +194,7 @@ class RemoteKsckTest : public KuduTest {
   std::stringstream err_stream_;
 
  private:
-  Sockaddr master_rpc_addr_;
-  std::shared_ptr<MiniCluster> mini_cluster_;
   client::KuduSchema schema_;
-  shared_ptr<client::KuduTable> client_table_;
-  std::shared_ptr<KsckMaster> master_;
-  std::shared_ptr<KsckCluster> cluster_;
   Random random_;
 };
 
@@ -318,5 +320,20 @@ TEST_F(RemoteKsckTest, DISABLED_TestChecksumSnapshotCurrentTimestamp) {
   writer_thread->Join();
 }
 
+TEST_F(RemoteKsckTest, TestLeaderMasterDown) {
+  // Make sure ksck's client is created with the current leader master.
+  ASSERT_OK(ksck_->CheckMasterRunning());
+
+  // Shut down the leader master.
+  int leader_idx;
+  ASSERT_OK(mini_cluster_->GetLeaderMasterIndex(&leader_idx));
+  mini_cluster_->mini_master(leader_idx)->Shutdown();
+
+  // Try to ksck. The underlying client will need to find the new leader master
+  // in order for the test to pass.
+  ASSERT_OK(ksck_->FetchTableAndTabletInfo());
+  ASSERT_OK(ksck_->FetchInfoFromTabletServers());
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/ksck_remote.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index 641271c..c2cc85c 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -17,9 +17,11 @@
 
 #include "kudu/tools/ksck_remote.h"
 
+#include "kudu/client/client.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/gutil/map-util.h"
+#include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/util/net/net_util.h"
@@ -27,13 +29,19 @@
 
 DEFINE_bool(checksum_cache_blocks, false, "Should the checksum scanners cache the read blocks");
 DEFINE_int64(timeout_ms, 1000 * 60, "RPC timeout in milliseconds");
-DEFINE_int32(tablets_batch_size_max, 100, "How many tablets to get from the Master per RPC");
 
 namespace kudu {
 namespace tools {
 
 static const std::string kMessengerName = "ksck";
 
+using client::KuduClient;
+using client::KuduClientBuilder;
+using client::KuduReplica;
+using client::KuduScanToken;
+using client::KuduScanTokenBuilder;
+using client::KuduTable;
+using client::KuduTabletServer;
 using rpc::Messenger;
 using rpc::MessengerBuilder;
 using rpc::RpcController;
@@ -156,9 +164,9 @@ class ChecksumStepper {
       return; // Deletes 'this'.
     }
     if (resp_.has_resource_metrics() || resp_.has_rows_checksummed()) {
-      auto bytes = resp_.resource_metrics().cfile_cache_miss_bytes() +
+      int64_t bytes = resp_.resource_metrics().cfile_cache_miss_bytes() +
           resp_.resource_metrics().cfile_cache_hit_bytes();
-      callbacks_->Progress(resp_.rows_checksummed(), bytes);;
+      callbacks_->Progress(resp_.rows_checksummed(), bytes);
     }
     DCHECK(resp_.has_checksum());
     checksum_ = resp_.checksum();
@@ -251,35 +259,32 @@ void RemoteKsckTabletServer::RunTabletChecksumScanAsync(
   ignore_result(stepper.release()); // Deletes self on callback.
 }
 
-Status RemoteKsckMaster::Connect() const {
-  master::PingRequestPB req;
-  master::PingResponsePB resp;
-  RpcController rpc;
-  rpc.set_timeout(GetDefaultTimeout());
-  return proxy_->Ping(req, &resp, &rpc);
+Status RemoteKsckMaster::Connect() {
+  client::sp::shared_ptr<KuduClient> client;
+  KuduClientBuilder builder;
+  builder.master_server_addrs(master_addresses_);
+  return builder.Build(&client_);
 }
 
-Status RemoteKsckMaster::Build(const Sockaddr& address, shared_ptr<KsckMaster>* master) {
+Status RemoteKsckMaster::Build(const vector<string>& master_addresses,
+                               shared_ptr<KsckMaster>* master) {
   shared_ptr<Messenger> messenger;
   MessengerBuilder builder(kMessengerName);
   RETURN_NOT_OK(builder.Build(&messenger));
-  master->reset(new RemoteKsckMaster(address, messenger));
+  master->reset(new RemoteKsckMaster(master_addresses, messenger));
   return Status::OK();
 }
 
 Status RemoteKsckMaster::RetrieveTabletServers(TSMap* tablet_servers) {
-  master::ListTabletServersRequestPB req;
-  master::ListTabletServersResponsePB resp;
-  RpcController rpc;
+  vector<KuduTabletServer*> servers;
+  ElementDeleter deleter(&servers);
+  RETURN_NOT_OK(client_->ListTabletServers(&servers));
 
-  rpc.set_timeout(GetDefaultTimeout());
-  RETURN_NOT_OK(proxy_->ListTabletServers(req, &resp, &rpc));
   tablet_servers->clear();
-  for (const master::ListTabletServersResponsePB_Entry& e : resp.servers()) {
-    HostPortPB addr = e.registration().rpc_addresses(0);
+  for (const auto* s : servers) {
     shared_ptr<RemoteKsckTabletServer> ts(
-        new RemoteKsckTabletServer(e.instance_id().permanent_uuid(),
-                                   HostPort(addr.host(), addr.port()),
+        new RemoteKsckTabletServer(s->uuid(),
+                                   HostPort(s->hostname(), s->port()),
                                    messenger_));
     RETURN_NOT_OK(ts->Init());
     InsertOrDie(tablet_servers, ts->uuid(), ts);
@@ -288,21 +293,17 @@ Status RemoteKsckMaster::RetrieveTabletServers(TSMap* tablet_servers) {
 }
 
 Status RemoteKsckMaster::RetrieveTablesList(vector<shared_ptr<KsckTable>>* tables) {
-  master::ListTablesRequestPB req;
-  master::ListTablesResponsePB resp;
-  RpcController rpc;
-
-  rpc.set_timeout(GetDefaultTimeout());
-  RETURN_NOT_OK(proxy_->ListTables(req, &resp, &rpc));
-  if (resp.has_error()) {
-    return StatusFromPB(resp.error().status());
-  }
+  vector<string> table_names;
+  RETURN_NOT_OK(client_->ListTables(&table_names));
+
   vector<shared_ptr<KsckTable>> tables_temp;
-  for (const master::ListTablesResponsePB_TableInfo& info : resp.tables()) {
-    Schema schema;
-    int num_replicas;
-    RETURN_NOT_OK(GetTableInfo(info.name(), &schema, &num_replicas));
-    shared_ptr<KsckTable> table(new KsckTable(info.name(), schema, num_replicas));
+  for (const auto& n : table_names) {
+    client::sp::shared_ptr<KuduTable> t;
+    RETURN_NOT_OK(client_->OpenTable(n, &t));
+
+    shared_ptr<KsckTable> table(new KsckTable(n,
+                                              *t->schema().schema_,
+                                              t->num_replicas()));
     tables_temp.push_back(table);
   }
   tables->assign(tables_temp.begin(), tables_temp.end());
@@ -311,71 +312,28 @@ Status RemoteKsckMaster::RetrieveTablesList(vector<shared_ptr<KsckTable>>* table
 
 Status RemoteKsckMaster::RetrieveTabletsList(const shared_ptr<KsckTable>& table) {
   vector<shared_ptr<KsckTablet>> tablets;
-  bool more_tablets = true;
-  string next_key;
-  int retries = 0;
-  while (more_tablets) {
-    Status s = GetTabletsBatch(table, &next_key, tablets, &more_tablets);
-    if (s.IsServiceUnavailable() && retries++ < 25) {
-      SleepFor(MonoDelta::FromMilliseconds(100 * retries));
-    } else if (!s.ok()) {
-      return s;
-    }
-  }
-
-  table->set_tablets(tablets);
-  return Status::OK();
-}
 
-Status RemoteKsckMaster::GetTabletsBatch(const shared_ptr<KsckTable>& table,
-                                         string* next_partition_key,
-                                         vector<shared_ptr<KsckTablet>>& tablets,
-                                         bool* more_tablets) {
-  master::GetTableLocationsRequestPB req;
-  master::GetTableLocationsResponsePB resp;
-  RpcController rpc;
-
-  req.mutable_table()->set_table_name(table->name());
-  req.set_max_returned_locations(FLAGS_tablets_batch_size_max);
-  req.set_partition_key_start(*next_partition_key);
-
-  rpc.set_timeout(GetDefaultTimeout());
-  RETURN_NOT_OK(proxy_->GetTableLocations(req, &resp, &rpc));
-  for (const master::TabletLocationsPB& locations : resp.tablet_locations()) {
-    if (locations.partition().partition_key_start() < *next_partition_key) {
-      // We've already seen this partition.
-      continue;
-    }
+  client::sp::shared_ptr<KuduTable> client_table;
+  RETURN_NOT_OK(client_->OpenTable(table->name(), &client_table));
 
-    *next_partition_key = ImmediateSuccessor(locations.partition().partition_key_start());
+  vector<KuduScanToken*> tokens;
+  ElementDeleter deleter(&tokens);
 
-    shared_ptr<KsckTablet> tablet(new KsckTablet(table.get(), locations.tablet_id()));
+  KuduScanTokenBuilder builder(client_table.get());
+  RETURN_NOT_OK(builder.Build(&tokens));
+  for (const auto* t : tokens) {
+    shared_ptr<KsckTablet> tablet(
+        new KsckTablet(table.get(), t->tablet().id()));
     vector<shared_ptr<KsckTabletReplica>> replicas;
-    for (const master::TabletLocationsPB_ReplicaPB& replica : locations.replicas()) {
-      bool is_leader = replica.role() == consensus::RaftPeerPB::LEADER;
-      bool is_follower = replica.role() == consensus::RaftPeerPB::FOLLOWER;
+    for (const auto* r : t->tablet().replicas()) {
       replicas.push_back(shared_ptr<KsckTabletReplica>(
-          new KsckTabletReplica(replica.ts_info().permanent_uuid(), is_leader, is_follower)));
+          new KsckTabletReplica(r->ts().uuid(), r->is_leader())));
     }
     tablet->set_replicas(replicas);
     tablets.push_back(tablet);
   }
-  *more_tablets = resp.tablet_locations().size() == FLAGS_tablets_batch_size_max;
-  return Status::OK();
-}
 
-Status RemoteKsckMaster::GetTableInfo(const string& table_name, Schema* schema, int* num_replicas) {
-  master::GetTableSchemaRequestPB req;
-  master::GetTableSchemaResponsePB resp;
-  RpcController rpc;
-
-  req.mutable_table()->set_table_name(table_name);
-
-  rpc.set_timeout(GetDefaultTimeout());
-  RETURN_NOT_OK(proxy_->GetTableSchema(req, &resp, &rpc));
-
-  RETURN_NOT_OK(SchemaFromPB(resp.schema(), schema));
-  *num_replicas = resp.num_replicas();
+  table->set_tablets(tablets);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/ksck_remote.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.h b/src/kudu/tools/ksck_remote.h
index 8068bb0..ee049d6 100644
--- a/src/kudu/tools/ksck_remote.h
+++ b/src/kudu/tools/ksck_remote.h
@@ -22,8 +22,7 @@
 #include <string>
 #include <vector>
 
-#include "kudu/master/master.h"
-#include "kudu/master/master.proxy.h"
+#include "kudu/client/client.h"
 #include "kudu/rpc/messenger.h"
 #include "kudu/server/server_base.h"
 #include "kudu/server/server_base.proxy.h"
@@ -76,11 +75,12 @@ class RemoteKsckTabletServer : public KsckTabletServer {
 class RemoteKsckMaster : public KsckMaster {
  public:
 
-  static Status Build(const Sockaddr& address, std::shared_ptr<KsckMaster>* master);
+  static Status Build(const std::vector<std::string>& master_addresses,
+                      std::shared_ptr<KsckMaster>* master);
 
   virtual ~RemoteKsckMaster() { }
 
-  virtual Status Connect() const OVERRIDE;
+  virtual Status Connect() OVERRIDE;
 
   virtual Status RetrieveTabletServers(TSMap* tablet_servers) OVERRIDE;
 
@@ -90,25 +90,16 @@ class RemoteKsckMaster : public KsckMaster {
 
  private:
 
-  explicit RemoteKsckMaster(const Sockaddr& address,
-                            const std::shared_ptr<rpc::Messenger>& messenger)
-      : messenger_(messenger),
-        proxy_(new master::MasterServiceProxy(messenger, address)) {
+  RemoteKsckMaster(const std::vector<std::string>& master_addresses,
+                   const std::shared_ptr<rpc::Messenger>& messenger)
+      : master_addresses_(master_addresses),
+        messenger_(messenger) {
   }
 
-  Status GetTableInfo(const std::string& table_name, Schema* schema, int* num_replicas);
-
-  // Used to get a batch of tablets from the master, passing a pointer to the
-  // seen last key that will be used as the new start key. The
-  // last_partition_key is updated to point at the new last key that came in
-  // the batch.
-  Status GetTabletsBatch(const std::shared_ptr<KsckTable>& table,
-                         std::string* last_partition_key,
-                         std::vector<std::shared_ptr<KsckTablet> >& tablets,
-                         bool* more_tablets);
+  const std::vector<std::string> master_addresses_;
+  const std::shared_ptr<rpc::Messenger> messenger_;
 
-  std::shared_ptr<rpc::Messenger> messenger_;
-  std::shared_ptr<master::MasterServiceProxy> proxy_;
+  client::sp::shared_ptr<client::KuduClient> client_;
 };
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/b5aa4a76/src/kudu/tools/tool_action_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc
index 93ed5db..d519b93 100644
--- a/src/kudu/tools/tool_action_cluster.cc
+++ b/src/kudu/tools/tool_action_cluster.cc
@@ -26,10 +26,8 @@
 
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/split.h"
-#include "kudu/master/master.h"
 #include "kudu/tools/ksck.h"
 #include "kudu/tools/ksck_remote.h"
-#include "kudu/util/net/net_util.h"
 #include "kudu/util/status.h"
 
 #define PUSH_PREPEND_NOT_OK(s, statuses, msg) do { \
@@ -65,15 +63,11 @@ namespace tools {
 namespace {
 
 Status RunKsck(const RunnerContext& context) {
-  vector<Sockaddr> master_addrs;
-  string master_address = FindOrDie(context.required_args, "master_address");
-  RETURN_NOT_OK_PREPEND(ParseAddressList(master_address,
-                                         master::Master::kDefaultPort,
-                                         &master_addrs),
-                        "unable to parse master address");
-
+  string master_addresses_str = FindOrDie(context.required_args,
+                                          "master_addresses");
+  vector<string> master_addresses = strings::Split(master_addresses_str, ",");
   shared_ptr<KsckMaster> master;
-  RETURN_NOT_OK_PREPEND(RemoteKsckMaster::Build(master_addrs[0], &master),
+  RETURN_NOT_OK_PREPEND(RemoteKsckMaster::Build(master_addresses, &master),
                         "unable to build KsckMaster");
 
   shared_ptr<KsckCluster> cluster(new KsckCluster(master));
@@ -132,7 +126,10 @@ unique_ptr<Mode> BuildClusterMode() {
       "actively receiving inserts or updates.";
   unique_ptr<Action> ksck = ActionBuilder(
       { "ksck", desc }, &RunKsck)
-    .AddRequiredParameter({ "master_address", "Kudu Master RPC address of form hostname:port" })
+    .AddRequiredParameter({
+        "master_addresses",
+        "Comma-separated list of Kudu Master addressess where each address is "
+        "of form hostname:port" })
     .AddOptionalParameter("checksum_scan")
     .AddOptionalParameter("checksum_snapshot")
     .AddOptionalParameter("color")