You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 05:29:43 UTC

[1/3] kudu git commit: [client] expose non-voter replicas for kudu CLI tool

Repository: kudu
Updated Branches:
  refs/heads/branch-1.6.x d9eb7d4cb -> 8a570e772


[client] expose non-voter replicas for kudu CLI tool

Added private interface to expose non-voter replica in the kudu CLI
tools.  Also, use that functionality to list table's replicas.

Updated output of ksck to include information on non-voters replicas:
  * renamed column 'Voters' into 'Replicas'
  * a non-voter is displayed with '~' extra suffix, e.g. in the example
    below two non-voter replicas, B and C, are in the config along with
    leader replica A:

 Config source |   Replicas   | Current term | Config index | Committed?
---------------+--------------+--------------+--------------+-----------
 master        | A*  B~  C~   |              |              | Yes
 A             | A*  B~  C~   | 4            | 5            | Yes
 B             | A*  B~  C~   | 4            | 5            | Yes
 C             | A*  B~  C~   | 4            | 5            | Yes

I also updated the format of output to make it more readable.  An
example of new output for 'kudu table list --list_tablets':

loadgen_auto_1e5f57a73dd34fa7bde7465219bd1866
  T 69e22ffecb5b459db8a7fbf2f751a76f
    L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872
    N f2e726948d3143e184097cd953dd2230 127.0.0.1:9878
    N 3ae7fdf107cf424dbf9c5e33c33eafd0 127.0.0.1:9876

  T 662bae40e4ec46588e90670bec16b2b5
    L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872

  T 5ba95da9b2f9455db9d690bdd35ae5cb
    L df8296cf97df42eaacd78e794c028c79 127.0.0.1:9874

  T 528ae88cce1e4415a0d075dead5c983c
    L 20e66417713446039c22987fec13b800 127.0.0.1:9870

  T 157ea108357e469e8fee001aec521d7e
    L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872

  T 9c6a4d493fff4bda9bfe8f158cb08e69
    L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872

  T 36d03adab6ad494ea4e769ef92ef5577
    L df8296cf97df42eaacd78e794c028c79 127.0.0.1:9874

  T 7ce6d42984734614bd627d872e99c833
    L 20e66417713446039c22987fec13b800 127.0.0.1:9870

Change-Id: I19317fdf5a2d5c8bb5f37b27bb83067a4df4ea20
Reviewed-on: http://gerrit.cloudera.org:8080/8586
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/8733
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7c442fba
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7c442fba
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7c442fba

Branch: refs/heads/branch-1.6.x
Commit: 7c442fbad68b7af9516ec1a9e2975b5efa1bc573
Parents: d9eb7d4
Author: Alexey Serbin <as...@cloudera.com>
Authored: Thu Nov 16 18:03:51 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:28:59 2017 +0000

----------------------------------------------------------------------
 src/kudu/client/CMakeLists.txt                  |   1 +
 src/kudu/client/client.cc                       |  10 +-
 src/kudu/client/client.h                        |   4 +
 src/kudu/client/client_builder-internal.cc      |   6 +-
 src/kudu/client/client_builder-internal.h       |   2 +
 src/kudu/client/meta_cache.cc                   |  41 ++++--
 src/kudu/client/meta_cache.h                    |   6 +-
 src/kudu/client/replica-internal.cc             |   3 +-
 src/kudu/client/replica-internal.h              |   3 +-
 src/kudu/client/replica_controller-internal.cc  |  40 ++++++
 src/kudu/client/replica_controller-internal.h   |  57 ++++++++
 src/kudu/client/scan_token-internal.cc          |   3 +-
 .../raft_consensus_nonvoter-itest.cc            |  86 ++++++++++++
 src/kudu/tools/ksck-test.cc                     |  30 ++--
 src/kudu/tools/ksck.cc                          | 140 ++++++++++++-------
 src/kudu/tools/ksck.h                           |  51 ++++---
 src/kudu/tools/ksck_remote.cc                   |   9 +-
 src/kudu/tools/kudu-admin-test.cc               |   2 +-
 src/kudu/tools/tool_action_table.cc             |  83 ++++++-----
 19 files changed, 428 insertions(+), 149 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/client/CMakeLists.txt b/src/kudu/client/CMakeLists.txt
index a78d300..92a1c7e 100644
--- a/src/kudu/client/CMakeLists.txt
+++ b/src/kudu/client/CMakeLists.txt
@@ -44,6 +44,7 @@ set(CLIENT_SRCS
   scan_token-internal.cc
   scanner-internal.cc
   replica-internal.cc
+  replica_controller-internal.cc
   resource_metrics.cc
   schema.cc
   session-internal.cc

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index aed362d..93431b7 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -324,7 +324,7 @@ Status KuduClientBuilder::Build(shared_ptr<KuduClient>* client) {
     RETURN_NOT_OK(ImportAuthnCredsToMessenger(data_->authn_creds_, messenger.get()));
   }
 
-  shared_ptr<KuduClient> c(new KuduClient());
+  shared_ptr<KuduClient> c(new KuduClient);
   c->data_->messenger_ = std::move(messenger);
   c->data_->master_server_addrs_ = data_->master_server_addrs_;
   c->data_->default_admin_operation_timeout_ = data_->default_admin_operation_timeout_;
@@ -336,8 +336,8 @@ Status KuduClientBuilder::Build(shared_ptr<KuduClient>* client) {
   RETURN_NOT_OK_PREPEND(c->data_->ConnectToCluster(c.get(), deadline),
                         "Could not connect to the cluster");
 
-  c->data_->meta_cache_.reset(new MetaCache(c.get()));
-  c->data_->dns_resolver_.reset(new DnsResolver());
+  c->data_->meta_cache_.reset(new MetaCache(c.get(), data_->replica_visibility_));
+  c->data_->dns_resolver_.reset(new DnsResolver);
 
   // Init local host names used for locality decisions.
   RETURN_NOT_OK_PREPEND(c->data_->InitLocalHostNames(),
@@ -547,9 +547,11 @@ Status KuduClient::GetTablet(const string& tablet_id, KuduTablet** tablet) {
     unique_ptr<KuduTabletServer> ts(new KuduTabletServer);
     ts->data_ = new KuduTabletServer::Data(ts_info.permanent_uuid(), hp);
 
+    // TODO(aserbin): try to use member_type instead of role for metacache.
     bool is_leader = r.role() == consensus::RaftPeerPB::LEADER;
+    bool is_voter = is_leader || r.role() == consensus::RaftPeerPB::FOLLOWER;
     unique_ptr<KuduReplica> replica(new KuduReplica);
-    replica->data_ = new KuduReplica::Data(is_leader, std::move(ts));
+    replica->data_ = new KuduReplica::Data(is_leader, is_voter, std::move(ts));
 
     replicas.push_back(replica.release());
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 0729fcc..b8dd7ec 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -87,6 +87,7 @@ class LookupRpc;
 class MetaCache;
 class RemoteTablet;
 class RemoteTabletServer;
+class ReplicaController;
 class WriteRpc;
 } // namespace internal
 
@@ -260,6 +261,8 @@ class KUDU_EXPORT KuduClientBuilder {
  private:
   class KUDU_NO_EXPORT Data;
 
+  friend class internal::ReplicaController;
+
   // Owned.
   Data* data_;
 
@@ -607,6 +610,7 @@ class KUDU_EXPORT KuduReplica {
  private:
   friend class KuduClient;
   friend class KuduScanTokenBuilder;
+  friend class internal::ReplicaController;
 
   class KUDU_NO_EXPORT Data;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client_builder-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client_builder-internal.cc b/src/kudu/client/client_builder-internal.cc
index cb56393..7d0d496 100644
--- a/src/kudu/client/client_builder-internal.cc
+++ b/src/kudu/client/client_builder-internal.cc
@@ -16,14 +16,16 @@
 // under the License.
 
 #include "kudu/client/client_builder-internal.h"
+#include "kudu/client/replica_controller-internal.h"
 
 namespace kudu {
 
 namespace client {
 
 KuduClientBuilder::Data::Data()
-  : default_admin_operation_timeout_(MonoDelta::FromSeconds(30)),
-    default_rpc_timeout_(MonoDelta::FromSeconds(10)) {
+    : default_admin_operation_timeout_(MonoDelta::FromSeconds(30)),
+      default_rpc_timeout_(MonoDelta::FromSeconds(10)),
+      replica_visibility_(internal::ReplicaController::Visibility::VOTERS) {
 }
 
 KuduClientBuilder::Data::~Data() {

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client_builder-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/client_builder-internal.h b/src/kudu/client/client_builder-internal.h
index 9b9c02b..07d977a 100644
--- a/src/kudu/client/client_builder-internal.h
+++ b/src/kudu/client/client_builder-internal.h
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/util/monotime.h"
 
@@ -37,6 +38,7 @@ class KuduClientBuilder::Data {
   MonoDelta default_admin_operation_timeout_;
   MonoDelta default_rpc_timeout_;
   std::string authn_creds_;
+  internal::ReplicaController::Visibility replica_visibility_;
 
   DISALLOW_COPY_AND_ASSIGN(Data);
 };

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/meta_cache.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 08b222c..9c9520b 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -70,6 +70,7 @@ class Messenger;
 }
 
 using consensus::RaftPeerPB;
+using master::ANY_REPLICA;
 using master::GetTableLocationsRequestPB;
 using master::GetTableLocationsResponsePB;
 using master::MasterServiceProxy;
@@ -502,9 +503,11 @@ void MetaCacheServerPicker::InitProxyCb(const ServerPickedCallback& callback,
 
 ////////////////////////////////////////////////////////////
 
-MetaCache::MetaCache(KuduClient* client)
-  : client_(client),
-    master_lookup_sem_(50) {
+MetaCache::MetaCache(KuduClient* client,
+                     ReplicaController::Visibility replica_visibility)
+    : client_(client),
+      master_lookup_sem_(50),
+      replica_visibility_(replica_visibility) {
 }
 
 MetaCache::~MetaCache() {
@@ -529,7 +532,7 @@ void MetaCache::UpdateTabletServer(const TSInfoPB& pb) {
 // Keeps a reference on the owning metacache while alive.
 class LookupRpc : public Rpc {
  public:
-  LookupRpc(const scoped_refptr<MetaCache>& meta_cache,
+  LookupRpc(scoped_refptr<MetaCache> meta_cache,
             StatusCallback user_cb,
             const KuduTable* table,
             string partition_key,
@@ -537,7 +540,8 @@ class LookupRpc : public Rpc {
             const MonoTime& deadline,
             shared_ptr<Messenger> messenger,
             int max_returned_locations,
-            bool is_exact_lookup);
+            bool is_exact_lookup,
+            ReplicaController::Visibility replica_visibility);
   virtual ~LookupRpc();
   virtual void SendRpc() OVERRIDE;
   virtual string ToString() const OVERRIDE;
@@ -593,31 +597,38 @@ class LookupRpc : public Rpc {
   bool has_permit_;
 
   // The max number of tablets to fetch per round trip from the master
-  int max_returned_locations_;
+  const int max_returned_locations_;
 
   // If true, this lookup is for an exact tablet match with the requested
   // partition key. If false, the next tablet after the partition key should be
   // returned if the partition key falls in a non-covered partition range.
-  bool is_exact_lookup_;
+  const bool is_exact_lookup_;
+
+  // Controlling which replicas to look up. If set to Visibility::ALL,
+  // non-voter tablet replicas, if any, appear in the lookup result in addition
+  // to 'regular' voter replicas.
+  const ReplicaController::Visibility replica_visibility_;
 };
 
-LookupRpc::LookupRpc(const scoped_refptr<MetaCache>& meta_cache,
+LookupRpc::LookupRpc(scoped_refptr<MetaCache> meta_cache,
                      StatusCallback user_cb, const KuduTable* table,
                      string partition_key,
                      scoped_refptr<RemoteTablet>* remote_tablet,
                      const MonoTime& deadline,
                      shared_ptr<Messenger> messenger,
                      int max_returned_locations,
-                     bool is_exact_lookup)
+                     bool is_exact_lookup,
+                     ReplicaController::Visibility replica_visibility)
     : Rpc(deadline, std::move(messenger)),
-      meta_cache_(meta_cache),
+      meta_cache_(std::move(meta_cache)),
       user_cb_(std::move(user_cb)),
       table_(table),
       partition_key_(std::move(partition_key)),
       remote_tablet_(remote_tablet),
       has_permit_(false),
       max_returned_locations_(max_returned_locations),
-      is_exact_lookup_(is_exact_lookup) {
+      is_exact_lookup_(is_exact_lookup),
+      replica_visibility_(replica_visibility) {
   DCHECK(deadline.Initialized());
 }
 
@@ -668,7 +679,9 @@ void LookupRpc::SendRpc() {
   req_.mutable_table()->set_table_id(table_->id());
   req_.set_partition_key_start(partition_key_);
   req_.set_max_returned_locations(max_returned_locations_);
-  req_.set_replica_type_filter(master::ANY_REPLICA);
+  if (replica_visibility_ == ReplicaController::Visibility::ALL) {
+    req_.set_replica_type_filter(master::ANY_REPLICA);
+  }
 
   // The end partition key is left unset intentionally so that we'll prefetch
   // some additional tablets.
@@ -1022,7 +1035,7 @@ void MetaCache::LookupTabletByKey(const KuduTable* table,
                                  deadline,
                                  client_->data_->messenger_,
                                  kFetchTabletsPerPointLookup,
-                                 true);
+                                 true, replica_visibility_);
   rpc->SendRpc();
 }
 
@@ -1040,7 +1053,7 @@ void MetaCache::LookupTabletByKeyOrNext(const KuduTable* table,
                                  deadline,
                                  client_->data_->messenger_,
                                  max_returned_locations,
-                                 false);
+                                 false, replica_visibility_);
   rpc->SendRpc();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/meta_cache.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/meta_cache.h b/src/kudu/client/meta_cache.h
index bd5e72e..ec003b7 100644
--- a/src/kudu/client/meta_cache.h
+++ b/src/kudu/client/meta_cache.h
@@ -30,6 +30,7 @@
 #include <glog/logging.h>
 #include <gtest/gtest_prod.h>
 
+#include "kudu/client/replica_controller-internal.h"
 #include "kudu/common/partition.h"
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/gutil/macros.h"
@@ -369,7 +370,7 @@ class MetaCacheEntry {
 class MetaCache : public RefCountedThreadSafe<MetaCache> {
  public:
   // The passed 'client' object must remain valid as long as MetaCache is alive.
-  explicit MetaCache(KuduClient* client);
+  MetaCache(KuduClient* client, ReplicaController::Visibility replica_visibility);
   ~MetaCache();
 
   // Look up which tablet hosts the given partition key for a table. When it is
@@ -471,6 +472,9 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
   // permits have been acquired.
   Semaphore master_lookup_sem_;
 
+  // Policy on tablet replica visibility: what type of replicas to expose.
+  const ReplicaController::Visibility replica_visibility_;
+
   DISALLOW_COPY_AND_ASSIGN(MetaCache);
 };
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica-internal.cc b/src/kudu/client/replica-internal.cc
index 156d8f4..8ea3540 100644
--- a/src/kudu/client/replica-internal.cc
+++ b/src/kudu/client/replica-internal.cc
@@ -27,8 +27,9 @@ namespace client {
 
 using std::unique_ptr;
 
-KuduReplica::Data::Data(bool is_leader, unique_ptr<KuduTabletServer> ts)
+KuduReplica::Data::Data(bool is_leader, bool is_voter, unique_ptr<KuduTabletServer> ts)
     : is_leader_(is_leader),
+      is_voter_(is_voter),
       ts_(std::move(ts)) {
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica-internal.h b/src/kudu/client/replica-internal.h
index 5603dea..7ba079f 100644
--- a/src/kudu/client/replica-internal.h
+++ b/src/kudu/client/replica-internal.h
@@ -26,10 +26,11 @@ namespace client {
 
 class KuduReplica::Data {
  public:
-  Data(bool is_leader, std::unique_ptr<KuduTabletServer> ts);
+  Data(bool is_leader, bool is_voter, std::unique_ptr<KuduTabletServer> ts);
   ~Data() = default;
 
   const bool is_leader_;
+  const bool is_voter_;
   const std::unique_ptr<KuduTabletServer> ts_;
 
  private:

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica_controller-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica_controller-internal.cc b/src/kudu/client/replica_controller-internal.cc
new file mode 100644
index 0000000..dca77ae
--- /dev/null
+++ b/src/kudu/client/replica_controller-internal.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/client/replica_controller-internal.h"
+
+#include "kudu/client/client.h"
+#include "kudu/client/client_builder-internal.h"
+#include "kudu/client/replica-internal.h"
+
+namespace kudu {
+namespace client {
+namespace internal {
+
+ReplicaController::ReplicaController() {}
+
+void ReplicaController::SetVisibility(KuduClientBuilder* builder, Visibility visibility) {
+  builder->data_->replica_visibility_ = visibility;
+}
+
+bool ReplicaController::is_voter(const KuduReplica& replica) {
+  return replica.data_->is_voter_;
+}
+
+} // namespace internal
+} // namespace client
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica_controller-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica_controller-internal.h b/src/kudu/client/replica_controller-internal.h
new file mode 100644
index 0000000..8358957
--- /dev/null
+++ b/src/kudu/client/replica_controller-internal.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "kudu/gutil/macros.h"
+
+namespace kudu {
+namespace client {
+
+class KuduClientBuilder;
+class KuduReplica;
+
+namespace internal {
+
+// This is a class whose sole responsibility is to access tablet replica's
+// visibility properties. Those are stored in private members of a few classes
+// from the Kudu client API. It's not yet clear whether we want to allow access
+// to those properties via the API, so it's safer to not expose them yet.
+//
+// Having this single class reduces cruft in friend class declarations in the
+// client.h file.
+class ReplicaController {
+ public:
+  // Control over tablet replica visibility: expose all or only voter replicas.
+  enum class Visibility {
+    ALL,    // Expose all replicas: both of voter and non-voter type.
+    VOTERS, // Expose only replicas of voter type.
+  };
+
+  // Set the specified replica visibility option for the given builder.
+  static void SetVisibility(KuduClientBuilder* builder, Visibility visibility);
+
+  static bool is_voter(const KuduReplica& replica);
+
+ private:
+  ReplicaController();
+
+  DISALLOW_COPY_AND_ASSIGN(ReplicaController);
+};
+
+} // namespace internal
+} // namespace client
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/scan_token-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/scan_token-internal.cc b/src/kudu/client/scan_token-internal.cc
index d8b2d16..de36be6 100644
--- a/src/kudu/client/scan_token-internal.cc
+++ b/src/kudu/client/scan_token-internal.cc
@@ -307,8 +307,9 @@ Status KuduScanTokenBuilder::Data::Build(vector<KuduScanToken*>* tokens) {
       client_ts->data_ = new KuduTabletServer::Data(r.ts->permanent_uuid(),
                                                     host_ports[0]);
       bool is_leader = r.role == consensus::RaftPeerPB::LEADER;
+      bool is_voter = is_leader || r.role == consensus::RaftPeerPB::FOLLOWER;
       unique_ptr<KuduReplica> client_replica(new KuduReplica);
-      client_replica->data_ = new KuduReplica::Data(is_leader,
+      client_replica->data_ = new KuduReplica::Data(is_leader, is_voter,
                                                     std::move(client_ts));
       client_replicas.push_back(client_replica.release());
     }

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
index 241ab4d..f3a0d9e 100644
--- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cstddef>
 #include <cstdint>
+#include <numeric>
 #include <ostream>
 #include <string>
 #include <unordered_map>
@@ -27,11 +29,13 @@
 #include <gtest/gtest.h>
 
 #include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
 #include "kudu/client/shared_ptr.h"
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/consensus/quorum_util.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/map-util.h"
+#include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/integration-tests/cluster_itest_util.h"
@@ -45,6 +49,7 @@
 #include "kudu/tserver/tablet_server-test-base.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
@@ -58,6 +63,13 @@ DECLARE_double(leader_failure_max_missed_heartbeat_periods);
 METRIC_DECLARE_gauge_int32(tablet_copy_open_client_sessions);
 METRIC_DECLARE_gauge_int32(tablet_copy_open_source_sessions);
 
+using kudu::client::sp::shared_ptr;
+using kudu::client::KuduClient;
+using kudu::client::KuduClientBuilder;
+using kudu::client::KuduScanToken;
+using kudu::client::KuduScanTokenBuilder;
+using kudu::client::KuduTable;
+using kudu::client::internal::ReplicaController;
 using kudu::cluster::ExternalDaemon;
 using kudu::cluster::ExternalMaster;
 using kudu::cluster::ExternalTabletServer;
@@ -338,6 +350,80 @@ TEST_F(RaftConsensusNonVoterITest, GetTableAndTabletLocations) {
   }
 }
 
+// Verify that non-voters replicas are not exposed to a regular Kudu client.
+// However, it's possible to see the replicas.
+TEST_F(RaftConsensusNonVoterITest, ReplicaMatchPolicy) {
+  const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
+  const int kOriginalReplicasNum = 3;
+  FLAGS_num_tablet_servers = kOriginalReplicasNum + 1;
+  FLAGS_num_replicas = kOriginalReplicasNum;
+  NO_FATALS(BuildAndStart());
+  ASSERT_EQ(FLAGS_num_tablet_servers, tablet_servers_.size());
+  ASSERT_EQ(kOriginalReplicasNum, tablet_replicas_.size());
+
+  const string& tablet_id = tablet_id_;
+  TabletServerMap replica_servers;
+  for (const auto& e : tablet_replicas_) {
+    if (e.first == tablet_id) {
+      replica_servers.emplace(e.second->uuid(), e.second);
+    }
+  }
+  ASSERT_EQ(FLAGS_num_replicas, replica_servers.size());
+
+  TServerDetails* new_replica = nullptr;
+  for (const auto& ts : tablet_servers_) {
+    if (replica_servers.find(ts.first) == replica_servers.end()) {
+      new_replica = ts.second;
+      break;
+    }
+  }
+  ASSERT_NE(nullptr, new_replica);
+
+  ASSERT_OK(AddReplica(tablet_id, new_replica, RaftPeerPB::NON_VOTER, kTimeout));
+  // Wait the newly added replica to start.
+  ASSERT_OK(WaitForNumTabletsOnTS(
+      new_replica, 1, kTimeout, nullptr, tablet::RUNNING));
+
+  auto count_replicas = [this](KuduTable* table, size_t* count) {
+    vector<KuduScanToken*> tokens;
+    ElementDeleter deleter(&tokens);
+    KuduScanTokenBuilder builder(table);
+    RETURN_NOT_OK(builder.Build(&tokens));
+    size_t replicas_count = std::accumulate(
+        tokens.begin(), tokens.end(), 0,
+        [](size_t sum, const KuduScanToken* token) {
+          return sum + token->tablet().replicas().size();
+        });
+    *count = replicas_count;
+    return Status::OK();
+  };
+
+  // The case of regular client: the non-voter replica should not be seen.
+  {
+    size_t count = 0;
+    ASSERT_OK(count_replicas(table_.get(), &count));
+    EXPECT_EQ(kOriginalReplicasNum, count);
+  }
+
+  // The case of special client used for internal tools, etc.: non-voter
+  // replicas should be visible.
+  {
+    KuduClientBuilder builder;
+    ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+    shared_ptr<KuduClient> client;
+    ASSERT_OK(builder
+              .add_master_server_addr(cluster_->master()->bound_rpc_addr().ToString())
+              .Build(&client));
+    ASSERT_NE(nullptr, client.get());
+    shared_ptr<KuduTable> t;
+    ASSERT_OK(client->OpenTable(table_->name(), &t));
+
+    size_t count = 0;
+    ASSERT_OK(count_replicas(t.get(), &count));
+    EXPECT_EQ(kOriginalReplicasNum + 1, count);
+  }
+}
+
 // Ensure that adding a NON_VOTER replica is properly handled by the system:
 //
 //   * Updating Raft configuration for tablet by adding a NON_VOTER replica

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 7031ec6..a7c81cd 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -213,15 +213,17 @@ class KsckTest : public KuduTest {
 
   void CreateAndFillTablet(shared_ptr<KsckTablet>& tablet, int num_replicas,
                            bool has_leader, bool is_running) {
-    vector<shared_ptr<KsckTabletReplica>> replicas;
-    if (has_leader) {
-      CreateReplicaAndAdd(&replicas, tablet->id(), true, is_running);
-      num_replicas--;
-    }
-    for (int i = 0; i < num_replicas; i++) {
-      CreateReplicaAndAdd(&replicas, tablet->id(), false, is_running);
+    {
+      vector<shared_ptr<KsckTabletReplica>> replicas;
+      if (has_leader) {
+        CreateReplicaAndAdd(&replicas, tablet->id(), true, is_running);
+        num_replicas--;
+      }
+      for (int i = 0; i < num_replicas; i++) {
+        CreateReplicaAndAdd(&replicas, tablet->id(), false, is_running);
+      }
+      tablet->set_replicas(std::move(replicas));
     }
-    tablet->set_replicas(replicas);
 
     // Set up the consensus state on each tablet server.
     consensus::ConsensusStatePB cstate;
@@ -247,8 +249,8 @@ class KsckTest : public KuduTest {
                            const string& tablet_id,
                            bool is_leader,
                            bool is_running) {
-    shared_ptr<KsckTabletReplica> replica(new KsckTabletReplica(assignment_plan_.back(),
-                                                                is_leader));
+    shared_ptr<KsckTabletReplica> replica(
+        new KsckTabletReplica(assignment_plan_.back(), is_leader, true));
     shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>(
             master_->tablet_servers_.at(assignment_plan_.back()));
 
@@ -415,7 +417,7 @@ TEST_F(KsckTest, TestConsensusConflictExtraPeer) {
   ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "The consensus matrix is:\n"
-      " Config source |      Voters      | Current term | Config index | Committed?\n"
+      " Config source |     Replicas     | Current term | Config index | Committed?\n"
       "---------------+------------------+--------------+--------------+------------\n"
       " master        | A*  B   C        |              |              | Yes\n"
       " A             | A*  B   C   D    | 0            |              | Yes\n"
@@ -441,7 +443,7 @@ TEST_F(KsckTest, TestConsensusConflictMissingPeer) {
   ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "The consensus matrix is:\n"
-      " Config source |    Voters    | Current term | Config index | Committed?\n"
+      " Config source |   Replicas   | Current term | Config index | Committed?\n"
       "---------------+--------------+--------------+--------------+------------\n"
       " master        | A*  B   C    |              |              | Yes\n"
       " A             | A*  B        | 0            |              | Yes\n"
@@ -467,7 +469,7 @@ TEST_F(KsckTest, TestConsensusConflictDifferentLeader) {
   ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "The consensus matrix is:\n"
-      " Config source |    Voters    | Current term | Config index | Committed?\n"
+      " Config source |   Replicas   | Current term | Config index | Committed?\n"
       "---------------+--------------+--------------+--------------+------------\n"
       " master        | A*  B   C    |              |              | Yes\n"
       " A             | A   B*  C    | 0            |              | Yes\n"
@@ -579,7 +581,7 @@ TEST_F(KsckTest, TestMasterNotReportingTabletServerWithConsensusConflict) {
   ASSERT_STR_CONTAINS(err_stream_.str(), "Table test has 3 under-replicated tablet(s)");
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "The consensus matrix is:\n"
-      " Config source |         Voters         | Current term | Config index | Committed?\n"
+      " Config source |        Replicas        | Current term | Config index | Committed?\n"
       "---------------+------------------------+--------------+--------------+------------\n"
       " master        | A*  B   C              |              |              | Yes\n"
       " A             | [config not available] |              |              | \n"

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index cf36516..0385b7f 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -24,6 +24,7 @@
 #include <iterator>
 #include <map>
 #include <mutex>
+#include <numeric>
 #include <type_traits>
 
 #include <boost/optional.hpp> // IWYU pragma: keep
@@ -66,24 +67,23 @@ DEFINE_bool(consensus, true,
 DEFINE_bool(verbose, false,
             "Output detailed information even if no inconsistency is found.");
 
-namespace kudu {
-namespace tools {
-
 using std::cout;
 using std::endl;
 using std::left;
 using std::map;
 using std::ostream;
-using std::right;
+using std::ostringstream;
 using std::setw;
 using std::shared_ptr;
 using std::string;
-using std::ostringstream;
 using std::to_string;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
+namespace kudu {
+namespace tools {
+
 namespace {
 // Return true if 'str' matches any of the patterns in 'patterns', or if
 // 'patterns' is empty.
@@ -369,7 +369,7 @@ class ChecksumResultReporter : public RefCountedThreadSafe<ChecksumResultReporte
 };
 
 // Queue of tablet replicas for an individual tablet server.
-typedef shared_ptr<BlockingQueue<std::pair<Schema, std::string> > > SharedTabletQueue;
+typedef shared_ptr<BlockingQueue<std::pair<Schema, std::string>>> SharedTabletQueue;
 
 // A set of callbacks which records the result of a tablet replica's checksum,
 // and then checks if the tablet server has any more tablets to checksum. If so,
@@ -654,22 +654,27 @@ struct ReplicaInfo {
   boost::optional<KsckConsensusState> consensus_state;
 };
 
-// Formats the peers known and unknown to 'config' using labels from 'peer_uuid_mapping'.
-string format_peers(const map<string, char>& peer_uuid_mapping, const KsckConsensusState& config) {
-  ostringstream voters;
-  int peer_width = 4;
-  for (const auto &entry : peer_uuid_mapping) {
-    if (!ContainsKey(config.peer_uuids, entry.first)) {
-      voters << setw(peer_width) << left << "";
+// Format replicas known and unknown to 'config' using labels from 'uuid_mapping'.
+string format_replicas(const map<string, char>& uuid_mapping, const KsckConsensusState& config) {
+  constexpr int kPeerWidth = 4;
+  ostringstream result;
+  for (const auto &entry : uuid_mapping) {
+    if (!ContainsKey(config.voter_uuids, entry.first) &&
+        !ContainsKey(config.non_voter_uuids, entry.first)) {
+      result << setw(kPeerWidth) << left << "";
       continue;
     }
     if (config.leader_uuid && config.leader_uuid == entry.first) {
-      voters << setw(peer_width) << left << Substitute("$0*", entry.second);
+      result << setw(kPeerWidth) << left << Substitute("$0*", entry.second);
     } else {
-      voters << setw(peer_width) << left << Substitute("$0", entry.second);
+      if (ContainsKey(config.non_voter_uuids, entry.first)) {
+        result << setw(kPeerWidth) << left << Substitute("$0~", entry.second);
+      } else {
+        result << setw(kPeerWidth) << left << Substitute("$0", entry.second);
+      }
     }
   }
-  return voters.str();
+  return result.str();
 }
 
 } // anonymous namespace
@@ -685,14 +690,21 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
   if (leader_it != tablet->replicas().cend()) {
     leader_uuid = (*leader_it)->ts_uuid();
   }
-  vector<string> peer_uuids_from_master;
+  vector<string> voter_uuids_from_master;
+  vector<string> non_voter_uuids_from_master;
   for (const auto& replica : tablet->replicas()) {
-    peer_uuids_from_master.push_back(replica->ts_uuid());
+    if (replica->is_voter()) {
+      voter_uuids_from_master.push_back(replica->ts_uuid());
+    } else {
+      non_voter_uuids_from_master.push_back(replica->ts_uuid());
+    }
   }
   KsckConsensusState master_config(KsckConsensusConfigType::MASTER,
-                                   boost::none, boost::none,
-                                   leader_uuid, peer_uuids_from_master);
-
+                                   boost::none,
+                                   boost::none,
+                                   leader_uuid,
+                                   voter_uuids_from_master,
+                                   non_voter_uuids_from_master);
   vector<ReplicaInfo> replica_infos;
   for (const shared_ptr<KsckTabletReplica>& replica : tablet->replicas()) {
     replica_infos.emplace_back();
@@ -722,9 +734,14 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
                              cstate.pending_config() :
                              cstate.committed_config();
         const auto& peers = config.peers();
-        vector<string> peer_uuids_from_replica;
+        vector<string> voter_uuids_from_replica;
+        vector<string> non_voter_uuids_from_replica;
         for (const auto& pb : peers) {
-          peer_uuids_from_replica.push_back(pb.permanent_uuid());
+          if (pb.member_type() == consensus::RaftPeerPB::NON_VOTER) {
+            non_voter_uuids_from_replica.push_back(pb.permanent_uuid());
+          } else {
+            voter_uuids_from_replica.push_back(pb.permanent_uuid());
+          }
         }
         boost::optional<int64_t> opid_index;
         if (config.has_opid_index()) {
@@ -734,27 +751,28 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
         if (!cstate.leader_uuid().empty()) {
           repl_leader_uuid = cstate.leader_uuid();
         }
-        KsckConsensusConfigType config_type = cstate.has_pending_config() ?
-                                              KsckConsensusConfigType::PENDING :
-                                              KsckConsensusConfigType::COMMITTED;
+        KsckConsensusConfigType config_type =
+            cstate.has_pending_config() ? KsckConsensusConfigType::PENDING
+                                        : KsckConsensusConfigType::COMMITTED;
         repl_info->consensus_state = KsckConsensusState(config_type,
-                                                         cstate.current_term(),
-                                                         opid_index,
-                                                         repl_leader_uuid,
-                                                         peer_uuids_from_replica);
+                                                        cstate.current_term(),
+                                                        opid_index,
+                                                        repl_leader_uuid,
+                                                        voter_uuids_from_replica,
+                                                        non_voter_uuids_from_replica);
       }
     }
   }
 
   // Summarize the states.
   int leaders_count = 0;
-  int running_count = 0;
+  int running_voters_count = 0;
   for (const auto& r : replica_infos) {
     if (r.replica->is_leader()) {
       leaders_count++;
     }
-    if (r.state == tablet::RUNNING) {
-      running_count++;
+    if (r.state == tablet::RUNNING && r.replica->is_voter()) {
+      running_voters_count++;
     }
   }
 
@@ -776,19 +794,22 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
 
   // Determine the overall health state of the tablet.
   CheckResult result = CheckResult::OK;
-  int num_voters = replica_infos.size();
+  int num_voters = std::accumulate(replica_infos.begin(), replica_infos.end(),
+                                   0, [](int sum, const ReplicaInfo& info) {
+      return sum + (info.replica->is_voter() ? 1 : 0);
+    });
   int majority_size = consensus::MajoritySize(num_voters);
-  if (running_count < majority_size) {
+  if (running_voters_count < majority_size) {
     Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
                         tablet_str,
                         Color(AnsiCode::RED, "unavailable"),
-                        num_voters - running_count) << endl;
+                        num_voters - running_voters_count) << endl;
     result = CheckResult::UNAVAILABLE;
-  } else if (running_count < num_voters) {
+  } else if (running_voters_count < num_voters) {
     Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
                         tablet_str,
                         Color(AnsiCode::YELLOW, "under-replicated"),
-                        num_voters - running_count) << endl;
+                        num_voters - running_voters_count) << endl;
     result = CheckResult::UNDER_REPLICATED;
   } else if (check_replica_count_ && num_voters < table_num_replicas) {
     Out() << Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
@@ -814,23 +835,24 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
   if (result != CheckResult::OK || FLAGS_verbose) {
     for (const ReplicaInfo& r : replica_infos) {
       string ts_str = r.ts ? r.ts->ToString() : r.replica->ts_uuid();
-      const char* leader_str = r.replica->is_leader() ? " [LEADER]" : "";
+      const char* spec_str = r.replica->is_leader()
+          ? " [LEADER]" : (!r.replica->is_voter() ? " [NONVOTER]" : "");
 
       Out() << "  " << ts_str << ": ";
       if (!r.ts || !r.ts->is_healthy()) {
-        Out() << Color(AnsiCode::YELLOW, "TS unavailable") << leader_str << endl;
+        Out() << Color(AnsiCode::YELLOW, "TS unavailable") << spec_str << endl;
         continue;
       }
       if (r.state == tablet::RUNNING) {
-        Out() << Color(AnsiCode::GREEN, "RUNNING") << leader_str << endl;
+        Out() << Color(AnsiCode::GREEN, "RUNNING") << spec_str << endl;
         continue;
       }
       if (r.status_pb == boost::none) {
-        Out() << Color(AnsiCode::YELLOW, "missing") << leader_str << endl;
+        Out() << Color(AnsiCode::YELLOW, "missing") << spec_str << endl;
         continue;
       }
 
-      Out() << Color(AnsiCode::YELLOW, "bad state") << leader_str << endl;
+      Out() << Color(AnsiCode::YELLOW, "bad state") << spec_str << endl;
       Out() << Substitute(
           "    State:       $0\n"
           "    Data state:  $1\n"
@@ -852,25 +874,35 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
             << endl;
     }
     int i = 0;
-    map<string, char> peer_uuid_mapping;
+    map<string, char> replica_uuid_mapping;
     // TODO(wdb): use a scheme that gives > 26 unique labels.
     const string labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    for (const auto& peer : master_config.peer_uuids) {
-      if (InsertIfNotPresent(&peer_uuid_mapping, peer, labels[i])) {
+    for (const auto& uuid : master_config.voter_uuids) {
+      if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
+        i = (i + 1) % labels.size();
+      }
+    }
+    for (const auto& uuid : master_config.non_voter_uuids) {
+      if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
         i = (i + 1) % labels.size();
       }
     }
     for (const ReplicaInfo& rs : replica_infos) {
       if (!rs.consensus_state) continue;
-      for (const auto& peer : rs.consensus_state->peer_uuids) {
-        if (InsertIfNotPresent(&peer_uuid_mapping, peer, labels[i])) {
+      for (const auto& uuid : rs.consensus_state->voter_uuids) {
+        if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
+          i = (i + 1) % labels.size();
+        }
+      }
+      for (const auto& uuid : rs.consensus_state->non_voter_uuids) {
+        if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
           i = (i + 1) % labels.size();
         }
       }
     }
 
     Out() << "  All the peers reported by the master and tablet servers are:" << endl;
-    for (const auto& entry : peer_uuid_mapping) {
+    for (const auto& entry : replica_uuid_mapping) {
       Out() << "  " << entry.second << " = " << entry.first << endl;
     }
     Out() << endl;
@@ -881,23 +913,23 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
 
     // Seed the columns with the master info.
     vector<string> sources{"master"};
-    vector<string> voters{format_peers(peer_uuid_mapping, master_config)};
+    vector<string> replicas{format_replicas(replica_uuid_mapping, master_config)};
     vector<string> terms{""};
     vector<string> indexes{""};
     vector<string> committed{"Yes"};
 
     // Fill out the columns with info from the replicas.
     for (const auto& replica_info : replica_infos) {
-      char label = FindOrDie(peer_uuid_mapping, replica_info.replica->ts_uuid());
+      char label = FindOrDie(replica_uuid_mapping, replica_info.replica->ts_uuid());
       sources.emplace_back(1, label);
       if (!replica_info.consensus_state) {
-        voters.emplace_back("[config not available]");
+        replicas.emplace_back("[config not available]");
         terms.emplace_back("");
         indexes.emplace_back("");
         committed.emplace_back("");
         continue;
       }
-      voters.push_back(format_peers(peer_uuid_mapping, replica_info.consensus_state.get()));
+      replicas.push_back(format_replicas(replica_uuid_mapping, replica_info.consensus_state.get()));
       terms.push_back(replica_info.consensus_state->term ?
                       std::to_string(replica_info.consensus_state->term.get()) : "");
       indexes.push_back(replica_info.consensus_state->opid_index ?
@@ -906,7 +938,7 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
           replica_info.consensus_state->type == KsckConsensusConfigType::PENDING ? "No" : "Yes");
     }
     table.AddColumn("Config source", std::move(sources));
-    table.AddColumn("Voters", std::move(voters));
+    table.AddColumn("Replicas", std::move(replicas));
     table.AddColumn("Current term", std::move(terms));
     table.AddColumn("Config index", std::move(indexes));
     table.AddColumn("Committed?", std::move(committed));

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index c0cc81a..da68edc 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -79,22 +79,29 @@ struct ChecksumOptions {
 // Representation of a tablet replica on a tablet server.
 class KsckTabletReplica {
  public:
-  KsckTabletReplica(std::string ts_uuid, bool is_leader)
-      : is_leader_(is_leader),
-        ts_uuid_(std::move(ts_uuid)) {
+  KsckTabletReplica(std::string ts_uuid, bool is_leader, bool is_voter)
+      : ts_uuid_(std::move(ts_uuid)),
+        is_leader_(is_leader),
+        is_voter_(is_voter) {
   }
 
-  const bool& is_leader() const {
+  const std::string& ts_uuid() const {
+    return ts_uuid_;
+  }
+
+  bool is_leader() const {
     return is_leader_;
   }
 
-  const std::string& ts_uuid() const {
-    return ts_uuid_;
+  bool is_voter() const {
+    return is_voter_;
   }
 
  private:
-  const bool is_leader_;
   const std::string ts_uuid_;
+  const bool is_leader_;
+  const bool is_voter_;
+
   DISALLOW_COPY_AND_ASSIGN(KsckTabletReplica);
 };
 
@@ -114,12 +121,14 @@ struct KsckConsensusState {
                      boost::optional<int64_t> term,
                      boost::optional<int64_t> opid_index,
                      boost::optional<std::string> leader_uuid,
-                     std::vector<std::string> peers)
+                     const std::vector<std::string>& voters,
+                     const std::vector<std::string>& non_voters)
     : type(type),
       term(std::move(term)),
       opid_index(std::move(opid_index)),
       leader_uuid(std::move(leader_uuid)),
-      peer_uuids(peers.cbegin(), peers.cend()) {
+      voter_uuids(voters.cbegin(), voters.cend()),
+      non_voter_uuids(non_voters.cbegin(), non_voters.cend()) {
   }
 
   // Two KsckConsensusState structs match if they have the same
@@ -127,7 +136,10 @@ struct KsckConsensusState {
   // - at least one of them is of type MASTER
   // - they are configs of the same type and they have the same term
   bool Matches(const KsckConsensusState &other) const {
-    bool same_leader_and_peers = leader_uuid == other.leader_uuid && peer_uuids == other.peer_uuids;
+    bool same_leader_and_peers =
+        leader_uuid == other.leader_uuid &&
+        voter_uuids == other.voter_uuids &&
+        non_voter_uuids == other.non_voter_uuids;
     if (type == KsckConsensusConfigType::MASTER || other.type == KsckConsensusConfigType::MASTER) {
       return same_leader_and_peers;
     }
@@ -138,7 +150,8 @@ struct KsckConsensusState {
   boost::optional<int64_t> term;
   boost::optional<int64_t> opid_index;
   boost::optional<std::string> leader_uuid;
-  std::set<std::string> peer_uuids;
+  std::set<std::string> voter_uuids;
+  std::set<std::string> non_voter_uuids;
 };
 
 // Representation of a tablet belonging to a table. The tablet is composed of replicas.
@@ -154,12 +167,12 @@ class KsckTablet {
     return id_;
   }
 
-  const std::vector<std::shared_ptr<KsckTabletReplica> >& replicas() const {
+  const std::vector<std::shared_ptr<KsckTabletReplica>>& replicas() const {
     return replicas_;
   }
 
-  void set_replicas(std::vector<std::shared_ptr<KsckTabletReplica> >& replicas) {
-    replicas_.assign(replicas.begin(), replicas.end());
+  void set_replicas(std::vector<std::shared_ptr<KsckTabletReplica>> replicas) {
+    replicas_.swap(replicas);
   }
 
   KsckTable* table() {
@@ -195,7 +208,7 @@ class KsckTable {
     tablets_ = std::move(tablets);
   }
 
-  std::vector<std::shared_ptr<KsckTablet> >& tablets() {
+  std::vector<std::shared_ptr<KsckTablet>>& tablets() {
     return tablets_;
   }
 
@@ -315,7 +328,7 @@ class KsckTabletServer {
 class KsckMaster {
  public:
   // Map of KsckTabletServer objects keyed by tablet server permanent_uuid.
-  typedef std::unordered_map<std::string, std::shared_ptr<KsckTabletServer> > TSMap;
+  typedef std::unordered_map<std::string, std::shared_ptr<KsckTabletServer>> TSMap;
 
   KsckMaster() { }
   virtual ~KsckMaster() { }
@@ -330,7 +343,7 @@ class KsckMaster {
 
   // Gets the list of tables from the Master and stores it in the passed vector.
   // tables is only modified if this method returns OK.
-  virtual Status RetrieveTablesList(std::vector<std::shared_ptr<KsckTable> >* tables) = 0;
+  virtual Status RetrieveTablesList(std::vector<std::shared_ptr<KsckTable>>* tables) = 0;
 
   // Gets the list of tablets for the specified table and stores the list in it.
   // The table's tablet list is only modified if this method returns OK.
@@ -359,7 +372,7 @@ class KsckCluster {
     return tablet_servers_;
   }
 
-  const std::vector<std::shared_ptr<KsckTable> >& tables() {
+  const std::vector<std::shared_ptr<KsckTable>>& tables() {
     return tables_;
   }
 
@@ -377,7 +390,7 @@ class KsckCluster {
 
   const std::shared_ptr<KsckMaster> master_;
   KsckMaster::TSMap tablet_servers_;
-  std::vector<std::shared_ptr<KsckTable> > tables_;
+  std::vector<std::shared_ptr<KsckTable>> tables_;
   DISALLOW_COPY_AND_ASSIGN(KsckCluster);
 };
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck_remote.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index e9bc060..27929ef 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -28,6 +28,7 @@
 #include <glog/logging.h>
 
 #include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
 #include "kudu/client/schema.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/schema.h"
@@ -68,6 +69,7 @@ using client::KuduScanToken;
 using client::KuduScanTokenBuilder;
 using client::KuduTable;
 using client::KuduTabletServer;
+using client::internal::ReplicaController;
 using rpc::Messenger;
 using rpc::MessengerBuilder;
 using rpc::RpcController;
@@ -311,10 +313,11 @@ void RemoteKsckTabletServer::RunTabletChecksumScanAsync(
 }
 
 Status RemoteKsckMaster::Connect() {
-  client::sp::shared_ptr<KuduClient> client;
   KuduClientBuilder builder;
   builder.default_rpc_timeout(GetDefaultTimeout());
   builder.master_server_addrs(master_addresses_);
+  ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+  client::sp::shared_ptr<KuduClient> client;
   return builder.Build(&client_);
 }
 
@@ -380,9 +383,9 @@ Status RemoteKsckMaster::RetrieveTabletsList(const shared_ptr<KsckTable>& table)
     vector<shared_ptr<KsckTabletReplica>> replicas;
     for (const auto* r : t->tablet().replicas()) {
       replicas.push_back(std::make_shared<KsckTabletReplica>(
-          r->ts().uuid(), r->is_leader()));
+          r->ts().uuid(), r->is_leader(), ReplicaController::is_voter(*r)));
     }
-    tablet->set_replicas(replicas);
+    tablet->set_replicas(std::move(replicas));
     tablets.push_back(tablet);
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index 7fb96ed..fe91dd9 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -1271,7 +1271,7 @@ TEST_F(AdminCliTest, TestListTablesDetail) {
   vector<string> stdout_lines = Split(stdout, "\n", strings::SkipEmpty());
 
   // Verify multiple tables along with their tablets and replica-uuids.
-  ASSERT_EQ(4, stdout_lines.size());
+  ASSERT_EQ(10, stdout_lines.size());
   ASSERT_STR_CONTAINS(stdout, kTableId);
   ASSERT_STR_CONTAINS(stdout, kAnotherTableId);
   ASSERT_STR_CONTAINS(stdout, tablet_ids.front());

http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/tool_action_table.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc
index e2df70b..d878f36 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -27,10 +27,12 @@
 #include <gflags/gflags.h>
 
 #include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
 #include "kudu/client/shared_ptr.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/tools/tool_action_common.h"
 #include "kudu/util/status.h"
 
@@ -45,11 +47,56 @@ using client::KuduClientBuilder;
 using client::KuduScanToken;
 using client::KuduScanTokenBuilder;
 using client::KuduTable;
+using client::internal::ReplicaController;
 using std::cout;
 using std::endl;
 using std::string;
 using std::unique_ptr;
 using std::vector;
+using strings::Split;
+
+// This class only exists so that ListTables() can easily be friended by
+// KuduReplica, KuduReplica::Data, and KuduClientBuilder.
+class TableLister {
+ public:
+  static Status ListTablets(const vector<string>& master_addresses) {
+    KuduClientBuilder builder;
+    ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+    client::sp::shared_ptr<KuduClient> client;
+    RETURN_NOT_OK(builder
+                  .master_server_addrs(master_addresses)
+                  .Build(&client));
+    vector<string> table_names;
+    RETURN_NOT_OK(client->ListTables(&table_names));
+
+    for (const auto& tname : table_names) {
+      cout << tname << endl;
+      if (!FLAGS_list_tablets) {
+        continue;
+      }
+      client::sp::shared_ptr<KuduTable> client_table;
+      RETURN_NOT_OK(client->OpenTable(tname, &client_table));
+      vector<KuduScanToken*> tokens;
+      ElementDeleter deleter(&tokens);
+      KuduScanTokenBuilder builder(client_table.get());
+      RETURN_NOT_OK(builder.Build(&tokens));
+
+      for (const auto* token : tokens) {
+        cout << "  T " << token->tablet().id() << endl;
+        for (const auto* replica : token->tablet().replicas()) {
+          const bool is_voter = ReplicaController::is_voter(*replica);
+          const bool is_leader = replica->is_leader();
+          cout << strings::Substitute("    $0 $1 $2:$3",
+              is_leader ? "L" : (is_voter ? "V" : "N"), replica->ts().uuid(),
+              replica->ts().hostname(), replica->ts().port()) << endl;
+        }
+        cout << endl;
+      }
+      cout << endl;
+    }
+    return Status::OK();
+  }
+};
 
 namespace {
 
@@ -58,7 +105,7 @@ const char* const kTableNameArg = "table_name";
 Status DeleteTable(const RunnerContext& context) {
   const string& master_addresses_str = FindOrDie(context.required_args,
                                                  kMasterAddressesArg);
-  vector<string> master_addresses = strings::Split(master_addresses_str, ",");
+  vector<string> master_addresses = Split(master_addresses_str, ",");
   const string& table_name = FindOrDie(context.required_args, kTableNameArg);
 
   client::sp::shared_ptr<KuduClient> client;
@@ -71,39 +118,7 @@ Status DeleteTable(const RunnerContext& context) {
 Status ListTables(const RunnerContext& context) {
   const string& master_addresses_str = FindOrDie(context.required_args,
                                                  kMasterAddressesArg);
-  vector<string> master_addresses = strings::Split(master_addresses_str, ",");
-
-  client::sp::shared_ptr<KuduClient> client;
-  RETURN_NOT_OK(KuduClientBuilder()
-                .master_server_addrs(master_addresses)
-                .Build(&client));
-  vector<string> table_names;
-  RETURN_NOT_OK(client->ListTables(&table_names));
-
-  for (const auto& tname : table_names) {
-    cout << tname << endl;
-    if (!FLAGS_list_tablets) {
-      continue;
-    }
-    client::sp::shared_ptr<KuduTable> client_table;
-    RETURN_NOT_OK(client->OpenTable(tname, &client_table));
-    vector<KuduScanToken*> tokens;
-    ElementDeleter deleter(&tokens);
-    KuduScanTokenBuilder builder(client_table.get());
-    RETURN_NOT_OK(builder.Build(&tokens));
-
-    for (const auto* token : tokens) {
-      cout << "T " << token->tablet().id() << "\t";
-      for (const auto* replica : token->tablet().replicas()) {
-        cout << "P" << (replica->is_leader() ? "(L) " : "    ")
-             << replica->ts().uuid() << "(" << replica->ts().hostname()
-             << ":" << replica->ts().port() << ")" << "    ";
-      }
-      cout << endl;
-    }
-    cout << endl;
-  }
-  return Status::OK();
+  return TableLister::ListTablets(Split(master_addresses_str, ","));
 }
 
 } // anonymous namespace


[2/3] kudu git commit: Disable flaky KUDU-1097 tablet move test

Posted by mp...@apache.org.
Disable flaky KUDU-1097 tablet move test

This test is flaky because the leader may get marked with replace, which
is not currently reliably supported when KUDU-1097 is enabled.

Change-Id: I118accafd7da99c35beb5a4f4a9f164984f17baf
Reviewed-on: http://gerrit.cloudera.org:8080/8720
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/8734


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d0d6994d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d0d6994d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d0d6994d

Branch: refs/heads/branch-1.6.x
Commit: d0d6994d4e212e4058af7ce1207b3991e98b8ea6
Parents: 7c442fb
Author: Mike Percy <mp...@apache.org>
Authored: Fri Dec 1 16:25:07 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:29:13 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-admin-test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d0d6994d/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index fe91dd9..5b7afaf 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -315,7 +315,7 @@ TEST_F(AdminCliTest, TestMoveTablet_pre_KUDU_1097) {
   DoTestMoveTablet(kDisableKudu1097);
 }
 
-TEST_F(AdminCliTest, TestMoveTablet_KUDU_1097) {
+TEST_F(AdminCliTest, DISABLED_TestMoveTablet_KUDU_1097) {
   DoTestMoveTablet(kEnableKudu1097);
 }
 


[3/3] kudu git commit: KUDU-1097. Only add/evict when processing leader tablet reports

Posted by mp...@apache.org.
KUDU-1097. Only add/evict when processing leader tablet reports

The leader replica is the only replica that sends a health report. The
master needs the health report to make an eviction decision. Thus, we
should only consider eviction or addition of a new replica if the tablet
report was received from the leader.

There is no standalone test for this but this patch is required for the
following patch, "Never evict a leader", to pass due to the DCHECK
introduced in that patch.

Change-Id: If7e318e042cd27ff4544bc5aae76db4ab9e51da1
Reviewed-on: http://gerrit.cloudera.org:8080/8682
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/8735
Tested-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8a570e77
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8a570e77
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8a570e77

Branch: refs/heads/branch-1.6.x
Commit: 8a570e7720d6947f678b41df018413db80821a14
Parents: d0d6994
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 04:24:13 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:29:22 2017 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 44 ++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/8a570e77/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index beb4302..8144d74 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3488,32 +3488,36 @@ Status CatalogManager::ProcessTabletReport(
       // 7e. Make tablet configuration change depending on the mode the server
       // is running with. The choice between two alternative modes is controlled
       // by the 'raft_prepare_replacement_before_eviction' run-time flag.
-      if (FLAGS_raft_prepare_replacement_before_eviction) {
-        // An alternative scheme of managing tablet replicas: the catalog
-        // manager processes the health-related info on replicas from the tablet
-        // report and initiates appropriate modifications for the tablet Raft
-        // configuration: evict an already-replaced failed voter replica or add
-        // a new non-voter replica marked for promotion as a replacement.
+      if (!FLAGS_raft_prepare_replacement_before_eviction) {
+        if (consensus_state_updated &&
+            FLAGS_master_add_server_when_underreplicated &&
+            CountVoters(cstate.committed_config()) < replication_factor) {
+          // Add a server to the config if it is under-replicated.
+          //
+          // This is an idempotent operation due to a CAS enforced on the
+          // committed config's opid_index.
+          rpcs.emplace_back(new AsyncAddReplicaTask(
+              master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
+        }
+
+      // When --raft_prepare_replacement_before_eviction is enabled, we
+      // consider whether to add or evict replicas based on the health report
+      // included in the leader's tablet report. Since only the leader tracks
+      // health, we ignore reports from non-leaders in this case.
+      } else if (!cstate.leader_uuid().empty() &&
+                 ts_desc->permanent_uuid() == cstate.leader_uuid()) {
         const RaftConfigPB& config = cstate.committed_config();
         string to_evict;
-        if (IsUnderReplicated(config, replication_factor)) {
-          rpcs.emplace_back(new AsyncAddReplicaTask(
-              master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
-        } else if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
-                   CanEvictReplica(config, replication_factor, &to_evict)) {
+        if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
+                         CanEvictReplica(config, replication_factor, &to_evict)) {
           DCHECK(!to_evict.empty());
           rpcs.emplace_back(new AsyncEvictReplicaTask(
               master_, tablet, cstate, std::move(to_evict)));
+        } else if (FLAGS_master_add_server_when_underreplicated &&
+                   IsUnderReplicated(config, replication_factor)) {
+          rpcs.emplace_back(new AsyncAddReplicaTask(
+              master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
         }
-      } else if (consensus_state_updated &&
-                 FLAGS_master_add_server_when_underreplicated &&
-                 CountVoters(cstate.committed_config()) < replication_factor) {
-        // Add a server to the config if it is under-replicated.
-        //
-        // This is an idempotent operation due to a CAS enforced on the
-        // committed config's opid_index.
-        rpcs.emplace_back(new AsyncAddReplicaTask(
-            master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
       }
     }