You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 05:29:43 UTC
[1/3] kudu git commit: [client] expose non-voter replicas for kudu
CLI tool
Repository: kudu
Updated Branches:
refs/heads/branch-1.6.x d9eb7d4cb -> 8a570e772
[client] expose non-voter replicas for kudu CLI tool
Added private interface to expose non-voter replica in the kudu CLI
tools. Also, use that functionality to list table's replicas.
Updated output of ksck to include information on non-voters replicas:
* renamed column 'Voters' into 'Replicas'
* a non-voter is displayed with '~' extra suffix, e.g. in the example
below two non-voter replicas, B and C, are in the config along with
leader replica A:
Config source | Replicas | Current term | Config index | Committed?
---------------+--------------+--------------+--------------+-----------
master | A* B~ C~ | | | Yes
A | A* B~ C~ | 4 | 5 | Yes
B | A* B~ C~ | 4 | 5 | Yes
C | A* B~ C~ | 4 | 5 | Yes
I also updated the format of output to make it more readable. An
example of new output for 'kudu table list --list_tablets':
loadgen_auto_1e5f57a73dd34fa7bde7465219bd1866
T 69e22ffecb5b459db8a7fbf2f751a76f
L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872
N f2e726948d3143e184097cd953dd2230 127.0.0.1:9878
N 3ae7fdf107cf424dbf9c5e33c33eafd0 127.0.0.1:9876
T 662bae40e4ec46588e90670bec16b2b5
L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872
T 5ba95da9b2f9455db9d690bdd35ae5cb
L df8296cf97df42eaacd78e794c028c79 127.0.0.1:9874
T 528ae88cce1e4415a0d075dead5c983c
L 20e66417713446039c22987fec13b800 127.0.0.1:9870
T 157ea108357e469e8fee001aec521d7e
L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872
T 9c6a4d493fff4bda9bfe8f158cb08e69
L 5acf108a9d364178b1aa672e52214ab8 127.0.0.1:9872
T 36d03adab6ad494ea4e769ef92ef5577
L df8296cf97df42eaacd78e794c028c79 127.0.0.1:9874
T 7ce6d42984734614bd627d872e99c833
L 20e66417713446039c22987fec13b800 127.0.0.1:9870
Change-Id: I19317fdf5a2d5c8bb5f37b27bb83067a4df4ea20
Reviewed-on: http://gerrit.cloudera.org:8080/8586
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/8733
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Mike Percy <mp...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7c442fba
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7c442fba
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7c442fba
Branch: refs/heads/branch-1.6.x
Commit: 7c442fbad68b7af9516ec1a9e2975b5efa1bc573
Parents: d9eb7d4
Author: Alexey Serbin <as...@cloudera.com>
Authored: Thu Nov 16 18:03:51 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:28:59 2017 +0000
----------------------------------------------------------------------
src/kudu/client/CMakeLists.txt | 1 +
src/kudu/client/client.cc | 10 +-
src/kudu/client/client.h | 4 +
src/kudu/client/client_builder-internal.cc | 6 +-
src/kudu/client/client_builder-internal.h | 2 +
src/kudu/client/meta_cache.cc | 41 ++++--
src/kudu/client/meta_cache.h | 6 +-
src/kudu/client/replica-internal.cc | 3 +-
src/kudu/client/replica-internal.h | 3 +-
src/kudu/client/replica_controller-internal.cc | 40 ++++++
src/kudu/client/replica_controller-internal.h | 57 ++++++++
src/kudu/client/scan_token-internal.cc | 3 +-
.../raft_consensus_nonvoter-itest.cc | 86 ++++++++++++
src/kudu/tools/ksck-test.cc | 30 ++--
src/kudu/tools/ksck.cc | 140 ++++++++++++-------
src/kudu/tools/ksck.h | 51 ++++---
src/kudu/tools/ksck_remote.cc | 9 +-
src/kudu/tools/kudu-admin-test.cc | 2 +-
src/kudu/tools/tool_action_table.cc | 83 ++++++-----
19 files changed, 428 insertions(+), 149 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/client/CMakeLists.txt b/src/kudu/client/CMakeLists.txt
index a78d300..92a1c7e 100644
--- a/src/kudu/client/CMakeLists.txt
+++ b/src/kudu/client/CMakeLists.txt
@@ -44,6 +44,7 @@ set(CLIENT_SRCS
scan_token-internal.cc
scanner-internal.cc
replica-internal.cc
+ replica_controller-internal.cc
resource_metrics.cc
schema.cc
session-internal.cc
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index aed362d..93431b7 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -324,7 +324,7 @@ Status KuduClientBuilder::Build(shared_ptr<KuduClient>* client) {
RETURN_NOT_OK(ImportAuthnCredsToMessenger(data_->authn_creds_, messenger.get()));
}
- shared_ptr<KuduClient> c(new KuduClient());
+ shared_ptr<KuduClient> c(new KuduClient);
c->data_->messenger_ = std::move(messenger);
c->data_->master_server_addrs_ = data_->master_server_addrs_;
c->data_->default_admin_operation_timeout_ = data_->default_admin_operation_timeout_;
@@ -336,8 +336,8 @@ Status KuduClientBuilder::Build(shared_ptr<KuduClient>* client) {
RETURN_NOT_OK_PREPEND(c->data_->ConnectToCluster(c.get(), deadline),
"Could not connect to the cluster");
- c->data_->meta_cache_.reset(new MetaCache(c.get()));
- c->data_->dns_resolver_.reset(new DnsResolver());
+ c->data_->meta_cache_.reset(new MetaCache(c.get(), data_->replica_visibility_));
+ c->data_->dns_resolver_.reset(new DnsResolver);
// Init local host names used for locality decisions.
RETURN_NOT_OK_PREPEND(c->data_->InitLocalHostNames(),
@@ -547,9 +547,11 @@ Status KuduClient::GetTablet(const string& tablet_id, KuduTablet** tablet) {
unique_ptr<KuduTabletServer> ts(new KuduTabletServer);
ts->data_ = new KuduTabletServer::Data(ts_info.permanent_uuid(), hp);
+ // TODO(aserbin): try to use member_type instead of role for metacache.
bool is_leader = r.role() == consensus::RaftPeerPB::LEADER;
+ bool is_voter = is_leader || r.role() == consensus::RaftPeerPB::FOLLOWER;
unique_ptr<KuduReplica> replica(new KuduReplica);
- replica->data_ = new KuduReplica::Data(is_leader, std::move(ts));
+ replica->data_ = new KuduReplica::Data(is_leader, is_voter, std::move(ts));
replicas.push_back(replica.release());
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 0729fcc..b8dd7ec 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -87,6 +87,7 @@ class LookupRpc;
class MetaCache;
class RemoteTablet;
class RemoteTabletServer;
+class ReplicaController;
class WriteRpc;
} // namespace internal
@@ -260,6 +261,8 @@ class KUDU_EXPORT KuduClientBuilder {
private:
class KUDU_NO_EXPORT Data;
+ friend class internal::ReplicaController;
+
// Owned.
Data* data_;
@@ -607,6 +610,7 @@ class KUDU_EXPORT KuduReplica {
private:
friend class KuduClient;
friend class KuduScanTokenBuilder;
+ friend class internal::ReplicaController;
class KUDU_NO_EXPORT Data;
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client_builder-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client_builder-internal.cc b/src/kudu/client/client_builder-internal.cc
index cb56393..7d0d496 100644
--- a/src/kudu/client/client_builder-internal.cc
+++ b/src/kudu/client/client_builder-internal.cc
@@ -16,14 +16,16 @@
// under the License.
#include "kudu/client/client_builder-internal.h"
+#include "kudu/client/replica_controller-internal.h"
namespace kudu {
namespace client {
KuduClientBuilder::Data::Data()
- : default_admin_operation_timeout_(MonoDelta::FromSeconds(30)),
- default_rpc_timeout_(MonoDelta::FromSeconds(10)) {
+ : default_admin_operation_timeout_(MonoDelta::FromSeconds(30)),
+ default_rpc_timeout_(MonoDelta::FromSeconds(10)),
+ replica_visibility_(internal::ReplicaController::Visibility::VOTERS) {
}
KuduClientBuilder::Data::~Data() {
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/client_builder-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/client_builder-internal.h b/src/kudu/client/client_builder-internal.h
index 9b9c02b..07d977a 100644
--- a/src/kudu/client/client_builder-internal.h
+++ b/src/kudu/client/client_builder-internal.h
@@ -21,6 +21,7 @@
#include <vector>
#include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
#include "kudu/gutil/macros.h"
#include "kudu/util/monotime.h"
@@ -37,6 +38,7 @@ class KuduClientBuilder::Data {
MonoDelta default_admin_operation_timeout_;
MonoDelta default_rpc_timeout_;
std::string authn_creds_;
+ internal::ReplicaController::Visibility replica_visibility_;
DISALLOW_COPY_AND_ASSIGN(Data);
};
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/meta_cache.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index 08b222c..9c9520b 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -70,6 +70,7 @@ class Messenger;
}
using consensus::RaftPeerPB;
+using master::ANY_REPLICA;
using master::GetTableLocationsRequestPB;
using master::GetTableLocationsResponsePB;
using master::MasterServiceProxy;
@@ -502,9 +503,11 @@ void MetaCacheServerPicker::InitProxyCb(const ServerPickedCallback& callback,
////////////////////////////////////////////////////////////
-MetaCache::MetaCache(KuduClient* client)
- : client_(client),
- master_lookup_sem_(50) {
+MetaCache::MetaCache(KuduClient* client,
+ ReplicaController::Visibility replica_visibility)
+ : client_(client),
+ master_lookup_sem_(50),
+ replica_visibility_(replica_visibility) {
}
MetaCache::~MetaCache() {
@@ -529,7 +532,7 @@ void MetaCache::UpdateTabletServer(const TSInfoPB& pb) {
// Keeps a reference on the owning metacache while alive.
class LookupRpc : public Rpc {
public:
- LookupRpc(const scoped_refptr<MetaCache>& meta_cache,
+ LookupRpc(scoped_refptr<MetaCache> meta_cache,
StatusCallback user_cb,
const KuduTable* table,
string partition_key,
@@ -537,7 +540,8 @@ class LookupRpc : public Rpc {
const MonoTime& deadline,
shared_ptr<Messenger> messenger,
int max_returned_locations,
- bool is_exact_lookup);
+ bool is_exact_lookup,
+ ReplicaController::Visibility replica_visibility);
virtual ~LookupRpc();
virtual void SendRpc() OVERRIDE;
virtual string ToString() const OVERRIDE;
@@ -593,31 +597,38 @@ class LookupRpc : public Rpc {
bool has_permit_;
// The max number of tablets to fetch per round trip from the master
- int max_returned_locations_;
+ const int max_returned_locations_;
// If true, this lookup is for an exact tablet match with the requested
// partition key. If false, the next tablet after the partition key should be
// returned if the partition key falls in a non-covered partition range.
- bool is_exact_lookup_;
+ const bool is_exact_lookup_;
+
+ // Controlling which replicas to look up. If set to Visibility::ALL,
+ // non-voter tablet replicas, if any, appear in the lookup result in addition
+ // to 'regular' voter replicas.
+ const ReplicaController::Visibility replica_visibility_;
};
-LookupRpc::LookupRpc(const scoped_refptr<MetaCache>& meta_cache,
+LookupRpc::LookupRpc(scoped_refptr<MetaCache> meta_cache,
StatusCallback user_cb, const KuduTable* table,
string partition_key,
scoped_refptr<RemoteTablet>* remote_tablet,
const MonoTime& deadline,
shared_ptr<Messenger> messenger,
int max_returned_locations,
- bool is_exact_lookup)
+ bool is_exact_lookup,
+ ReplicaController::Visibility replica_visibility)
: Rpc(deadline, std::move(messenger)),
- meta_cache_(meta_cache),
+ meta_cache_(std::move(meta_cache)),
user_cb_(std::move(user_cb)),
table_(table),
partition_key_(std::move(partition_key)),
remote_tablet_(remote_tablet),
has_permit_(false),
max_returned_locations_(max_returned_locations),
- is_exact_lookup_(is_exact_lookup) {
+ is_exact_lookup_(is_exact_lookup),
+ replica_visibility_(replica_visibility) {
DCHECK(deadline.Initialized());
}
@@ -668,7 +679,9 @@ void LookupRpc::SendRpc() {
req_.mutable_table()->set_table_id(table_->id());
req_.set_partition_key_start(partition_key_);
req_.set_max_returned_locations(max_returned_locations_);
- req_.set_replica_type_filter(master::ANY_REPLICA);
+ if (replica_visibility_ == ReplicaController::Visibility::ALL) {
+ req_.set_replica_type_filter(master::ANY_REPLICA);
+ }
// The end partition key is left unset intentionally so that we'll prefetch
// some additional tablets.
@@ -1022,7 +1035,7 @@ void MetaCache::LookupTabletByKey(const KuduTable* table,
deadline,
client_->data_->messenger_,
kFetchTabletsPerPointLookup,
- true);
+ true, replica_visibility_);
rpc->SendRpc();
}
@@ -1040,7 +1053,7 @@ void MetaCache::LookupTabletByKeyOrNext(const KuduTable* table,
deadline,
client_->data_->messenger_,
max_returned_locations,
- false);
+ false, replica_visibility_);
rpc->SendRpc();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/meta_cache.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/meta_cache.h b/src/kudu/client/meta_cache.h
index bd5e72e..ec003b7 100644
--- a/src/kudu/client/meta_cache.h
+++ b/src/kudu/client/meta_cache.h
@@ -30,6 +30,7 @@
#include <glog/logging.h>
#include <gtest/gtest_prod.h>
+#include "kudu/client/replica_controller-internal.h"
#include "kudu/common/partition.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/macros.h"
@@ -369,7 +370,7 @@ class MetaCacheEntry {
class MetaCache : public RefCountedThreadSafe<MetaCache> {
public:
// The passed 'client' object must remain valid as long as MetaCache is alive.
- explicit MetaCache(KuduClient* client);
+ MetaCache(KuduClient* client, ReplicaController::Visibility replica_visibility);
~MetaCache();
// Look up which tablet hosts the given partition key for a table. When it is
@@ -471,6 +472,9 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
// permits have been acquired.
Semaphore master_lookup_sem_;
+ // Policy on tablet replica visibility: what type of replicas to expose.
+ const ReplicaController::Visibility replica_visibility_;
+
DISALLOW_COPY_AND_ASSIGN(MetaCache);
};
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica-internal.cc b/src/kudu/client/replica-internal.cc
index 156d8f4..8ea3540 100644
--- a/src/kudu/client/replica-internal.cc
+++ b/src/kudu/client/replica-internal.cc
@@ -27,8 +27,9 @@ namespace client {
using std::unique_ptr;
-KuduReplica::Data::Data(bool is_leader, unique_ptr<KuduTabletServer> ts)
+KuduReplica::Data::Data(bool is_leader, bool is_voter, unique_ptr<KuduTabletServer> ts)
: is_leader_(is_leader),
+ is_voter_(is_voter),
ts_(std::move(ts)) {
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica-internal.h b/src/kudu/client/replica-internal.h
index 5603dea..7ba079f 100644
--- a/src/kudu/client/replica-internal.h
+++ b/src/kudu/client/replica-internal.h
@@ -26,10 +26,11 @@ namespace client {
class KuduReplica::Data {
public:
- Data(bool is_leader, std::unique_ptr<KuduTabletServer> ts);
+ Data(bool is_leader, bool is_voter, std::unique_ptr<KuduTabletServer> ts);
~Data() = default;
const bool is_leader_;
+ const bool is_voter_;
const std::unique_ptr<KuduTabletServer> ts_;
private:
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica_controller-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica_controller-internal.cc b/src/kudu/client/replica_controller-internal.cc
new file mode 100644
index 0000000..dca77ae
--- /dev/null
+++ b/src/kudu/client/replica_controller-internal.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/client/replica_controller-internal.h"
+
+#include "kudu/client/client.h"
+#include "kudu/client/client_builder-internal.h"
+#include "kudu/client/replica-internal.h"
+
+namespace kudu {
+namespace client {
+namespace internal {
+
+ReplicaController::ReplicaController() {}
+
+void ReplicaController::SetVisibility(KuduClientBuilder* builder, Visibility visibility) {
+ builder->data_->replica_visibility_ = visibility;
+}
+
+bool ReplicaController::is_voter(const KuduReplica& replica) {
+ return replica.data_->is_voter_;
+}
+
+} // namespace internal
+} // namespace client
+} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/replica_controller-internal.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/replica_controller-internal.h b/src/kudu/client/replica_controller-internal.h
new file mode 100644
index 0000000..8358957
--- /dev/null
+++ b/src/kudu/client/replica_controller-internal.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "kudu/gutil/macros.h"
+
+namespace kudu {
+namespace client {
+
+class KuduClientBuilder;
+class KuduReplica;
+
+namespace internal {
+
+// This is a class whose sole responsibility is to access tablet replica's
+// visibility properties. Those are stored in private members of a few classes
+// from the Kudu client API. It's not yet clear whether we want to allow access
+// to those properties via the API, so it's safer to not expose them yet.
+//
+// Having this single class reduces cruft in friend class declarations in the
+// client.h file.
+class ReplicaController {
+ public:
+ // Control over tablet replica visibility: expose all or only voter replicas.
+ enum class Visibility {
+ ALL, // Expose all replicas: both of voter and non-voter type.
+ VOTERS, // Expose only replicas of voter type.
+ };
+
+ // Set the specified replica visibility option for the given builder.
+ static void SetVisibility(KuduClientBuilder* builder, Visibility visibility);
+
+ static bool is_voter(const KuduReplica& replica);
+
+ private:
+ ReplicaController();
+
+ DISALLOW_COPY_AND_ASSIGN(ReplicaController);
+};
+
+} // namespace internal
+} // namespace client
+} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/client/scan_token-internal.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/scan_token-internal.cc b/src/kudu/client/scan_token-internal.cc
index d8b2d16..de36be6 100644
--- a/src/kudu/client/scan_token-internal.cc
+++ b/src/kudu/client/scan_token-internal.cc
@@ -307,8 +307,9 @@ Status KuduScanTokenBuilder::Data::Build(vector<KuduScanToken*>* tokens) {
client_ts->data_ = new KuduTabletServer::Data(r.ts->permanent_uuid(),
host_ports[0]);
bool is_leader = r.role == consensus::RaftPeerPB::LEADER;
+ bool is_voter = is_leader || r.role == consensus::RaftPeerPB::FOLLOWER;
unique_ptr<KuduReplica> client_replica(new KuduReplica);
- client_replica->data_ = new KuduReplica::Data(is_leader,
+ client_replica->data_ = new KuduReplica::Data(is_leader, is_voter,
std::move(client_ts));
client_replicas.push_back(client_replica.release());
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
index 241ab4d..f3a0d9e 100644
--- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+#include <cstddef>
#include <cstdint>
+#include <numeric>
#include <ostream>
#include <string>
#include <unordered_map>
@@ -27,11 +29,13 @@
#include <gtest/gtest.h>
#include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
#include "kudu/client/shared_ptr.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/consensus/quorum_util.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/stl_util.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/gutil/strings/util.h"
#include "kudu/integration-tests/cluster_itest_util.h"
@@ -45,6 +49,7 @@
#include "kudu/tserver/tablet_server-test-base.h"
#include "kudu/util/metrics.h"
#include "kudu/util/monotime.h"
+#include "kudu/util/net/sockaddr.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/status.h"
@@ -58,6 +63,13 @@ DECLARE_double(leader_failure_max_missed_heartbeat_periods);
METRIC_DECLARE_gauge_int32(tablet_copy_open_client_sessions);
METRIC_DECLARE_gauge_int32(tablet_copy_open_source_sessions);
+using kudu::client::sp::shared_ptr;
+using kudu::client::KuduClient;
+using kudu::client::KuduClientBuilder;
+using kudu::client::KuduScanToken;
+using kudu::client::KuduScanTokenBuilder;
+using kudu::client::KuduTable;
+using kudu::client::internal::ReplicaController;
using kudu::cluster::ExternalDaemon;
using kudu::cluster::ExternalMaster;
using kudu::cluster::ExternalTabletServer;
@@ -338,6 +350,80 @@ TEST_F(RaftConsensusNonVoterITest, GetTableAndTabletLocations) {
}
}
+// Verify that non-voters replicas are not exposed to a regular Kudu client.
+// However, it's possible to see the replicas.
+TEST_F(RaftConsensusNonVoterITest, ReplicaMatchPolicy) {
+ const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
+ const int kOriginalReplicasNum = 3;
+ FLAGS_num_tablet_servers = kOriginalReplicasNum + 1;
+ FLAGS_num_replicas = kOriginalReplicasNum;
+ NO_FATALS(BuildAndStart());
+ ASSERT_EQ(FLAGS_num_tablet_servers, tablet_servers_.size());
+ ASSERT_EQ(kOriginalReplicasNum, tablet_replicas_.size());
+
+ const string& tablet_id = tablet_id_;
+ TabletServerMap replica_servers;
+ for (const auto& e : tablet_replicas_) {
+ if (e.first == tablet_id) {
+ replica_servers.emplace(e.second->uuid(), e.second);
+ }
+ }
+ ASSERT_EQ(FLAGS_num_replicas, replica_servers.size());
+
+ TServerDetails* new_replica = nullptr;
+ for (const auto& ts : tablet_servers_) {
+ if (replica_servers.find(ts.first) == replica_servers.end()) {
+ new_replica = ts.second;
+ break;
+ }
+ }
+ ASSERT_NE(nullptr, new_replica);
+
+ ASSERT_OK(AddReplica(tablet_id, new_replica, RaftPeerPB::NON_VOTER, kTimeout));
+ // Wait the newly added replica to start.
+ ASSERT_OK(WaitForNumTabletsOnTS(
+ new_replica, 1, kTimeout, nullptr, tablet::RUNNING));
+
+ auto count_replicas = [this](KuduTable* table, size_t* count) {
+ vector<KuduScanToken*> tokens;
+ ElementDeleter deleter(&tokens);
+ KuduScanTokenBuilder builder(table);
+ RETURN_NOT_OK(builder.Build(&tokens));
+ size_t replicas_count = std::accumulate(
+ tokens.begin(), tokens.end(), 0,
+ [](size_t sum, const KuduScanToken* token) {
+ return sum + token->tablet().replicas().size();
+ });
+ *count = replicas_count;
+ return Status::OK();
+ };
+
+ // The case of regular client: the non-voter replica should not be seen.
+ {
+ size_t count = 0;
+ ASSERT_OK(count_replicas(table_.get(), &count));
+ EXPECT_EQ(kOriginalReplicasNum, count);
+ }
+
+ // The case of special client used for internal tools, etc.: non-voter
+ // replicas should be visible.
+ {
+ KuduClientBuilder builder;
+ ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+ shared_ptr<KuduClient> client;
+ ASSERT_OK(builder
+ .add_master_server_addr(cluster_->master()->bound_rpc_addr().ToString())
+ .Build(&client));
+ ASSERT_NE(nullptr, client.get());
+ shared_ptr<KuduTable> t;
+ ASSERT_OK(client->OpenTable(table_->name(), &t));
+
+ size_t count = 0;
+ ASSERT_OK(count_replicas(t.get(), &count));
+ EXPECT_EQ(kOriginalReplicasNum + 1, count);
+ }
+}
+
// Ensure that adding a NON_VOTER replica is properly handled by the system:
//
// * Updating Raft configuration for tablet by adding a NON_VOTER replica
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 7031ec6..a7c81cd 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -213,15 +213,17 @@ class KsckTest : public KuduTest {
void CreateAndFillTablet(shared_ptr<KsckTablet>& tablet, int num_replicas,
bool has_leader, bool is_running) {
- vector<shared_ptr<KsckTabletReplica>> replicas;
- if (has_leader) {
- CreateReplicaAndAdd(&replicas, tablet->id(), true, is_running);
- num_replicas--;
- }
- for (int i = 0; i < num_replicas; i++) {
- CreateReplicaAndAdd(&replicas, tablet->id(), false, is_running);
+ {
+ vector<shared_ptr<KsckTabletReplica>> replicas;
+ if (has_leader) {
+ CreateReplicaAndAdd(&replicas, tablet->id(), true, is_running);
+ num_replicas--;
+ }
+ for (int i = 0; i < num_replicas; i++) {
+ CreateReplicaAndAdd(&replicas, tablet->id(), false, is_running);
+ }
+ tablet->set_replicas(std::move(replicas));
}
- tablet->set_replicas(replicas);
// Set up the consensus state on each tablet server.
consensus::ConsensusStatePB cstate;
@@ -247,8 +249,8 @@ class KsckTest : public KuduTest {
const string& tablet_id,
bool is_leader,
bool is_running) {
- shared_ptr<KsckTabletReplica> replica(new KsckTabletReplica(assignment_plan_.back(),
- is_leader));
+ shared_ptr<KsckTabletReplica> replica(
+ new KsckTabletReplica(assignment_plan_.back(), is_leader, true));
shared_ptr<MockKsckTabletServer> ts = static_pointer_cast<MockKsckTabletServer>(
master_->tablet_servers_.at(assignment_plan_.back()));
@@ -415,7 +417,7 @@ TEST_F(KsckTest, TestConsensusConflictExtraPeer) {
ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
- " Config source | Voters | Current term | Config index | Committed?\n"
+ " Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A* B C D | 0 | | Yes\n"
@@ -441,7 +443,7 @@ TEST_F(KsckTest, TestConsensusConflictMissingPeer) {
ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
- " Config source | Voters | Current term | Config index | Committed?\n"
+ " Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+--------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A* B | 0 | | Yes\n"
@@ -467,7 +469,7 @@ TEST_F(KsckTest, TestConsensusConflictDifferentLeader) {
ASSERT_EQ("Corruption: 1 out of 1 table(s) are bad", s.ToString());
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
- " Config source | Voters | Current term | Config index | Committed?\n"
+ " Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+--------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | A B* C | 0 | | Yes\n"
@@ -579,7 +581,7 @@ TEST_F(KsckTest, TestMasterNotReportingTabletServerWithConsensusConflict) {
ASSERT_STR_CONTAINS(err_stream_.str(), "Table test has 3 under-replicated tablet(s)");
ASSERT_STR_CONTAINS(err_stream_.str(),
"The consensus matrix is:\n"
- " Config source | Voters | Current term | Config index | Committed?\n"
+ " Config source | Replicas | Current term | Config index | Committed?\n"
"---------------+------------------------+--------------+--------------+------------\n"
" master | A* B C | | | Yes\n"
" A | [config not available] | | | \n"
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index cf36516..0385b7f 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -24,6 +24,7 @@
#include <iterator>
#include <map>
#include <mutex>
+#include <numeric>
#include <type_traits>
#include <boost/optional.hpp> // IWYU pragma: keep
@@ -66,24 +67,23 @@ DEFINE_bool(consensus, true,
DEFINE_bool(verbose, false,
"Output detailed information even if no inconsistency is found.");
-namespace kudu {
-namespace tools {
-
using std::cout;
using std::endl;
using std::left;
using std::map;
using std::ostream;
-using std::right;
+using std::ostringstream;
using std::setw;
using std::shared_ptr;
using std::string;
-using std::ostringstream;
using std::to_string;
using std::unordered_map;
using std::vector;
using strings::Substitute;
+namespace kudu {
+namespace tools {
+
namespace {
// Return true if 'str' matches any of the patterns in 'patterns', or if
// 'patterns' is empty.
@@ -369,7 +369,7 @@ class ChecksumResultReporter : public RefCountedThreadSafe<ChecksumResultReporte
};
// Queue of tablet replicas for an individual tablet server.
-typedef shared_ptr<BlockingQueue<std::pair<Schema, std::string> > > SharedTabletQueue;
+typedef shared_ptr<BlockingQueue<std::pair<Schema, std::string>>> SharedTabletQueue;
// A set of callbacks which records the result of a tablet replica's checksum,
// and then checks if the tablet server has any more tablets to checksum. If so,
@@ -654,22 +654,27 @@ struct ReplicaInfo {
boost::optional<KsckConsensusState> consensus_state;
};
-// Formats the peers known and unknown to 'config' using labels from 'peer_uuid_mapping'.
-string format_peers(const map<string, char>& peer_uuid_mapping, const KsckConsensusState& config) {
- ostringstream voters;
- int peer_width = 4;
- for (const auto &entry : peer_uuid_mapping) {
- if (!ContainsKey(config.peer_uuids, entry.first)) {
- voters << setw(peer_width) << left << "";
+// Format replicas known and unknown to 'config' using labels from 'uuid_mapping'.
+string format_replicas(const map<string, char>& uuid_mapping, const KsckConsensusState& config) {
+ constexpr int kPeerWidth = 4;
+ ostringstream result;
+ for (const auto &entry : uuid_mapping) {
+ if (!ContainsKey(config.voter_uuids, entry.first) &&
+ !ContainsKey(config.non_voter_uuids, entry.first)) {
+ result << setw(kPeerWidth) << left << "";
continue;
}
if (config.leader_uuid && config.leader_uuid == entry.first) {
- voters << setw(peer_width) << left << Substitute("$0*", entry.second);
+ result << setw(kPeerWidth) << left << Substitute("$0*", entry.second);
} else {
- voters << setw(peer_width) << left << Substitute("$0", entry.second);
+ if (ContainsKey(config.non_voter_uuids, entry.first)) {
+ result << setw(kPeerWidth) << left << Substitute("$0~", entry.second);
+ } else {
+ result << setw(kPeerWidth) << left << Substitute("$0", entry.second);
+ }
}
}
- return voters.str();
+ return result.str();
}
} // anonymous namespace
@@ -685,14 +690,21 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
if (leader_it != tablet->replicas().cend()) {
leader_uuid = (*leader_it)->ts_uuid();
}
- vector<string> peer_uuids_from_master;
+ vector<string> voter_uuids_from_master;
+ vector<string> non_voter_uuids_from_master;
for (const auto& replica : tablet->replicas()) {
- peer_uuids_from_master.push_back(replica->ts_uuid());
+ if (replica->is_voter()) {
+ voter_uuids_from_master.push_back(replica->ts_uuid());
+ } else {
+ non_voter_uuids_from_master.push_back(replica->ts_uuid());
+ }
}
KsckConsensusState master_config(KsckConsensusConfigType::MASTER,
- boost::none, boost::none,
- leader_uuid, peer_uuids_from_master);
-
+ boost::none,
+ boost::none,
+ leader_uuid,
+ voter_uuids_from_master,
+ non_voter_uuids_from_master);
vector<ReplicaInfo> replica_infos;
for (const shared_ptr<KsckTabletReplica>& replica : tablet->replicas()) {
replica_infos.emplace_back();
@@ -722,9 +734,14 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
cstate.pending_config() :
cstate.committed_config();
const auto& peers = config.peers();
- vector<string> peer_uuids_from_replica;
+ vector<string> voter_uuids_from_replica;
+ vector<string> non_voter_uuids_from_replica;
for (const auto& pb : peers) {
- peer_uuids_from_replica.push_back(pb.permanent_uuid());
+ if (pb.member_type() == consensus::RaftPeerPB::NON_VOTER) {
+ non_voter_uuids_from_replica.push_back(pb.permanent_uuid());
+ } else {
+ voter_uuids_from_replica.push_back(pb.permanent_uuid());
+ }
}
boost::optional<int64_t> opid_index;
if (config.has_opid_index()) {
@@ -734,27 +751,28 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
if (!cstate.leader_uuid().empty()) {
repl_leader_uuid = cstate.leader_uuid();
}
- KsckConsensusConfigType config_type = cstate.has_pending_config() ?
- KsckConsensusConfigType::PENDING :
- KsckConsensusConfigType::COMMITTED;
+ KsckConsensusConfigType config_type =
+ cstate.has_pending_config() ? KsckConsensusConfigType::PENDING
+ : KsckConsensusConfigType::COMMITTED;
repl_info->consensus_state = KsckConsensusState(config_type,
- cstate.current_term(),
- opid_index,
- repl_leader_uuid,
- peer_uuids_from_replica);
+ cstate.current_term(),
+ opid_index,
+ repl_leader_uuid,
+ voter_uuids_from_replica,
+ non_voter_uuids_from_replica);
}
}
}
// Summarize the states.
int leaders_count = 0;
- int running_count = 0;
+ int running_voters_count = 0;
for (const auto& r : replica_infos) {
if (r.replica->is_leader()) {
leaders_count++;
}
- if (r.state == tablet::RUNNING) {
- running_count++;
+ if (r.state == tablet::RUNNING && r.replica->is_voter()) {
+ running_voters_count++;
}
}
@@ -776,19 +794,22 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
// Determine the overall health state of the tablet.
CheckResult result = CheckResult::OK;
- int num_voters = replica_infos.size();
+ int num_voters = std::accumulate(replica_infos.begin(), replica_infos.end(),
+ 0, [](int sum, const ReplicaInfo& info) {
+ return sum + (info.replica->is_voter() ? 1 : 0);
+ });
int majority_size = consensus::MajoritySize(num_voters);
- if (running_count < majority_size) {
+ if (running_voters_count < majority_size) {
Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::RED, "unavailable"),
- num_voters - running_count) << endl;
+ num_voters - running_voters_count) << endl;
result = CheckResult::UNAVAILABLE;
- } else if (running_count < num_voters) {
+ } else if (running_voters_count < num_voters) {
Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::YELLOW, "under-replicated"),
- num_voters - running_count) << endl;
+ num_voters - running_voters_count) << endl;
result = CheckResult::UNDER_REPLICATED;
} else if (check_replica_count_ && num_voters < table_num_replicas) {
Out() << Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
@@ -814,23 +835,24 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
if (result != CheckResult::OK || FLAGS_verbose) {
for (const ReplicaInfo& r : replica_infos) {
string ts_str = r.ts ? r.ts->ToString() : r.replica->ts_uuid();
- const char* leader_str = r.replica->is_leader() ? " [LEADER]" : "";
+ const char* spec_str = r.replica->is_leader()
+ ? " [LEADER]" : (!r.replica->is_voter() ? " [NONVOTER]" : "");
Out() << " " << ts_str << ": ";
if (!r.ts || !r.ts->is_healthy()) {
- Out() << Color(AnsiCode::YELLOW, "TS unavailable") << leader_str << endl;
+ Out() << Color(AnsiCode::YELLOW, "TS unavailable") << spec_str << endl;
continue;
}
if (r.state == tablet::RUNNING) {
- Out() << Color(AnsiCode::GREEN, "RUNNING") << leader_str << endl;
+ Out() << Color(AnsiCode::GREEN, "RUNNING") << spec_str << endl;
continue;
}
if (r.status_pb == boost::none) {
- Out() << Color(AnsiCode::YELLOW, "missing") << leader_str << endl;
+ Out() << Color(AnsiCode::YELLOW, "missing") << spec_str << endl;
continue;
}
- Out() << Color(AnsiCode::YELLOW, "bad state") << leader_str << endl;
+ Out() << Color(AnsiCode::YELLOW, "bad state") << spec_str << endl;
Out() << Substitute(
" State: $0\n"
" Data state: $1\n"
@@ -852,25 +874,35 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
<< endl;
}
int i = 0;
- map<string, char> peer_uuid_mapping;
+ map<string, char> replica_uuid_mapping;
// TODO(wdb): use a scheme that gives > 26 unique labels.
const string labels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
- for (const auto& peer : master_config.peer_uuids) {
- if (InsertIfNotPresent(&peer_uuid_mapping, peer, labels[i])) {
+ for (const auto& uuid : master_config.voter_uuids) {
+ if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
+ i = (i + 1) % labels.size();
+ }
+ }
+ for (const auto& uuid : master_config.non_voter_uuids) {
+ if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
i = (i + 1) % labels.size();
}
}
for (const ReplicaInfo& rs : replica_infos) {
if (!rs.consensus_state) continue;
- for (const auto& peer : rs.consensus_state->peer_uuids) {
- if (InsertIfNotPresent(&peer_uuid_mapping, peer, labels[i])) {
+ for (const auto& uuid : rs.consensus_state->voter_uuids) {
+ if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
+ i = (i + 1) % labels.size();
+ }
+ }
+ for (const auto& uuid : rs.consensus_state->non_voter_uuids) {
+ if (InsertIfNotPresent(&replica_uuid_mapping, uuid, labels[i])) {
i = (i + 1) % labels.size();
}
}
}
Out() << " All the peers reported by the master and tablet servers are:" << endl;
- for (const auto& entry : peer_uuid_mapping) {
+ for (const auto& entry : replica_uuid_mapping) {
Out() << " " << entry.second << " = " << entry.first << endl;
}
Out() << endl;
@@ -881,23 +913,23 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
// Seed the columns with the master info.
vector<string> sources{"master"};
- vector<string> voters{format_peers(peer_uuid_mapping, master_config)};
+ vector<string> replicas{format_replicas(replica_uuid_mapping, master_config)};
vector<string> terms{""};
vector<string> indexes{""};
vector<string> committed{"Yes"};
// Fill out the columns with info from the replicas.
for (const auto& replica_info : replica_infos) {
- char label = FindOrDie(peer_uuid_mapping, replica_info.replica->ts_uuid());
+ char label = FindOrDie(replica_uuid_mapping, replica_info.replica->ts_uuid());
sources.emplace_back(1, label);
if (!replica_info.consensus_state) {
- voters.emplace_back("[config not available]");
+ replicas.emplace_back("[config not available]");
terms.emplace_back("");
indexes.emplace_back("");
committed.emplace_back("");
continue;
}
- voters.push_back(format_peers(peer_uuid_mapping, replica_info.consensus_state.get()));
+ replicas.push_back(format_replicas(replica_uuid_mapping, replica_info.consensus_state.get()));
terms.push_back(replica_info.consensus_state->term ?
std::to_string(replica_info.consensus_state->term.get()) : "");
indexes.push_back(replica_info.consensus_state->opid_index ?
@@ -906,7 +938,7 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
replica_info.consensus_state->type == KsckConsensusConfigType::PENDING ? "No" : "Yes");
}
table.AddColumn("Config source", std::move(sources));
- table.AddColumn("Voters", std::move(voters));
+ table.AddColumn("Replicas", std::move(replicas));
table.AddColumn("Current term", std::move(terms));
table.AddColumn("Config index", std::move(indexes));
table.AddColumn("Committed?", std::move(committed));
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index c0cc81a..da68edc 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -79,22 +79,29 @@ struct ChecksumOptions {
// Representation of a tablet replica on a tablet server.
class KsckTabletReplica {
public:
- KsckTabletReplica(std::string ts_uuid, bool is_leader)
- : is_leader_(is_leader),
- ts_uuid_(std::move(ts_uuid)) {
+ KsckTabletReplica(std::string ts_uuid, bool is_leader, bool is_voter)
+ : ts_uuid_(std::move(ts_uuid)),
+ is_leader_(is_leader),
+ is_voter_(is_voter) {
}
- const bool& is_leader() const {
+ const std::string& ts_uuid() const {
+ return ts_uuid_;
+ }
+
+ bool is_leader() const {
return is_leader_;
}
- const std::string& ts_uuid() const {
- return ts_uuid_;
+ bool is_voter() const {
+ return is_voter_;
}
private:
- const bool is_leader_;
const std::string ts_uuid_;
+ const bool is_leader_;
+ const bool is_voter_;
+
DISALLOW_COPY_AND_ASSIGN(KsckTabletReplica);
};
@@ -114,12 +121,14 @@ struct KsckConsensusState {
boost::optional<int64_t> term,
boost::optional<int64_t> opid_index,
boost::optional<std::string> leader_uuid,
- std::vector<std::string> peers)
+ const std::vector<std::string>& voters,
+ const std::vector<std::string>& non_voters)
: type(type),
term(std::move(term)),
opid_index(std::move(opid_index)),
leader_uuid(std::move(leader_uuid)),
- peer_uuids(peers.cbegin(), peers.cend()) {
+ voter_uuids(voters.cbegin(), voters.cend()),
+ non_voter_uuids(non_voters.cbegin(), non_voters.cend()) {
}
// Two KsckConsensusState structs match if they have the same
@@ -127,7 +136,10 @@ struct KsckConsensusState {
// - at least one of them is of type MASTER
// - they are configs of the same type and they have the same term
bool Matches(const KsckConsensusState &other) const {
- bool same_leader_and_peers = leader_uuid == other.leader_uuid && peer_uuids == other.peer_uuids;
+ bool same_leader_and_peers =
+ leader_uuid == other.leader_uuid &&
+ voter_uuids == other.voter_uuids &&
+ non_voter_uuids == other.non_voter_uuids;
if (type == KsckConsensusConfigType::MASTER || other.type == KsckConsensusConfigType::MASTER) {
return same_leader_and_peers;
}
@@ -138,7 +150,8 @@ struct KsckConsensusState {
boost::optional<int64_t> term;
boost::optional<int64_t> opid_index;
boost::optional<std::string> leader_uuid;
- std::set<std::string> peer_uuids;
+ std::set<std::string> voter_uuids;
+ std::set<std::string> non_voter_uuids;
};
// Representation of a tablet belonging to a table. The tablet is composed of replicas.
@@ -154,12 +167,12 @@ class KsckTablet {
return id_;
}
- const std::vector<std::shared_ptr<KsckTabletReplica> >& replicas() const {
+ const std::vector<std::shared_ptr<KsckTabletReplica>>& replicas() const {
return replicas_;
}
- void set_replicas(std::vector<std::shared_ptr<KsckTabletReplica> >& replicas) {
- replicas_.assign(replicas.begin(), replicas.end());
+ void set_replicas(std::vector<std::shared_ptr<KsckTabletReplica>> replicas) {
+ replicas_.swap(replicas);
}
KsckTable* table() {
@@ -195,7 +208,7 @@ class KsckTable {
tablets_ = std::move(tablets);
}
- std::vector<std::shared_ptr<KsckTablet> >& tablets() {
+ std::vector<std::shared_ptr<KsckTablet>>& tablets() {
return tablets_;
}
@@ -315,7 +328,7 @@ class KsckTabletServer {
class KsckMaster {
public:
// Map of KsckTabletServer objects keyed by tablet server permanent_uuid.
- typedef std::unordered_map<std::string, std::shared_ptr<KsckTabletServer> > TSMap;
+ typedef std::unordered_map<std::string, std::shared_ptr<KsckTabletServer>> TSMap;
KsckMaster() { }
virtual ~KsckMaster() { }
@@ -330,7 +343,7 @@ class KsckMaster {
// Gets the list of tables from the Master and stores it in the passed vector.
// tables is only modified if this method returns OK.
- virtual Status RetrieveTablesList(std::vector<std::shared_ptr<KsckTable> >* tables) = 0;
+ virtual Status RetrieveTablesList(std::vector<std::shared_ptr<KsckTable>>* tables) = 0;
// Gets the list of tablets for the specified table and stores the list in it.
// The table's tablet list is only modified if this method returns OK.
@@ -359,7 +372,7 @@ class KsckCluster {
return tablet_servers_;
}
- const std::vector<std::shared_ptr<KsckTable> >& tables() {
+ const std::vector<std::shared_ptr<KsckTable>>& tables() {
return tables_;
}
@@ -377,7 +390,7 @@ class KsckCluster {
const std::shared_ptr<KsckMaster> master_;
KsckMaster::TSMap tablet_servers_;
- std::vector<std::shared_ptr<KsckTable> > tables_;
+ std::vector<std::shared_ptr<KsckTable>> tables_;
DISALLOW_COPY_AND_ASSIGN(KsckCluster);
};
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/ksck_remote.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index e9bc060..27929ef 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -28,6 +28,7 @@
#include <glog/logging.h>
#include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
#include "kudu/client/schema.h"
#include "kudu/common/common.pb.h"
#include "kudu/common/schema.h"
@@ -68,6 +69,7 @@ using client::KuduScanToken;
using client::KuduScanTokenBuilder;
using client::KuduTable;
using client::KuduTabletServer;
+using client::internal::ReplicaController;
using rpc::Messenger;
using rpc::MessengerBuilder;
using rpc::RpcController;
@@ -311,10 +313,11 @@ void RemoteKsckTabletServer::RunTabletChecksumScanAsync(
}
Status RemoteKsckMaster::Connect() {
- client::sp::shared_ptr<KuduClient> client;
KuduClientBuilder builder;
builder.default_rpc_timeout(GetDefaultTimeout());
builder.master_server_addrs(master_addresses_);
+ ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+ client::sp::shared_ptr<KuduClient> client;
return builder.Build(&client_);
}
@@ -380,9 +383,9 @@ Status RemoteKsckMaster::RetrieveTabletsList(const shared_ptr<KsckTable>& table)
vector<shared_ptr<KsckTabletReplica>> replicas;
for (const auto* r : t->tablet().replicas()) {
replicas.push_back(std::make_shared<KsckTabletReplica>(
- r->ts().uuid(), r->is_leader()));
+ r->ts().uuid(), r->is_leader(), ReplicaController::is_voter(*r)));
}
- tablet->set_replicas(replicas);
+ tablet->set_replicas(std::move(replicas));
tablets.push_back(tablet);
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index 7fb96ed..fe91dd9 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -1271,7 +1271,7 @@ TEST_F(AdminCliTest, TestListTablesDetail) {
vector<string> stdout_lines = Split(stdout, "\n", strings::SkipEmpty());
// Verify multiple tables along with their tablets and replica-uuids.
- ASSERT_EQ(4, stdout_lines.size());
+ ASSERT_EQ(10, stdout_lines.size());
ASSERT_STR_CONTAINS(stdout, kTableId);
ASSERT_STR_CONTAINS(stdout, kAnotherTableId);
ASSERT_STR_CONTAINS(stdout, tablet_ids.front());
http://git-wip-us.apache.org/repos/asf/kudu/blob/7c442fba/src/kudu/tools/tool_action_table.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc
index e2df70b..d878f36 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -27,10 +27,12 @@
#include <gflags/gflags.h>
#include "kudu/client/client.h"
+#include "kudu/client/replica_controller-internal.h"
#include "kudu/client/shared_ptr.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/stl_util.h"
#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/substitute.h"
#include "kudu/tools/tool_action_common.h"
#include "kudu/util/status.h"
@@ -45,11 +47,56 @@ using client::KuduClientBuilder;
using client::KuduScanToken;
using client::KuduScanTokenBuilder;
using client::KuduTable;
+using client::internal::ReplicaController;
using std::cout;
using std::endl;
using std::string;
using std::unique_ptr;
using std::vector;
+using strings::Split;
+
+// This class only exists so that ListTables() can easily be friended by
+// KuduReplica, KuduReplica::Data, and KuduClientBuilder.
+class TableLister {
+ public:
+ static Status ListTablets(const vector<string>& master_addresses) {
+ KuduClientBuilder builder;
+ ReplicaController::SetVisibility(&builder, ReplicaController::Visibility::ALL);
+ client::sp::shared_ptr<KuduClient> client;
+ RETURN_NOT_OK(builder
+ .master_server_addrs(master_addresses)
+ .Build(&client));
+ vector<string> table_names;
+ RETURN_NOT_OK(client->ListTables(&table_names));
+
+ for (const auto& tname : table_names) {
+ cout << tname << endl;
+ if (!FLAGS_list_tablets) {
+ continue;
+ }
+ client::sp::shared_ptr<KuduTable> client_table;
+ RETURN_NOT_OK(client->OpenTable(tname, &client_table));
+ vector<KuduScanToken*> tokens;
+ ElementDeleter deleter(&tokens);
+ KuduScanTokenBuilder builder(client_table.get());
+ RETURN_NOT_OK(builder.Build(&tokens));
+
+ for (const auto* token : tokens) {
+ cout << " T " << token->tablet().id() << endl;
+ for (const auto* replica : token->tablet().replicas()) {
+ const bool is_voter = ReplicaController::is_voter(*replica);
+ const bool is_leader = replica->is_leader();
+ cout << strings::Substitute(" $0 $1 $2:$3",
+ is_leader ? "L" : (is_voter ? "V" : "N"), replica->ts().uuid(),
+ replica->ts().hostname(), replica->ts().port()) << endl;
+ }
+ cout << endl;
+ }
+ cout << endl;
+ }
+ return Status::OK();
+ }
+};
namespace {
@@ -58,7 +105,7 @@ const char* const kTableNameArg = "table_name";
Status DeleteTable(const RunnerContext& context) {
const string& master_addresses_str = FindOrDie(context.required_args,
kMasterAddressesArg);
- vector<string> master_addresses = strings::Split(master_addresses_str, ",");
+ vector<string> master_addresses = Split(master_addresses_str, ",");
const string& table_name = FindOrDie(context.required_args, kTableNameArg);
client::sp::shared_ptr<KuduClient> client;
@@ -71,39 +118,7 @@ Status DeleteTable(const RunnerContext& context) {
Status ListTables(const RunnerContext& context) {
const string& master_addresses_str = FindOrDie(context.required_args,
kMasterAddressesArg);
- vector<string> master_addresses = strings::Split(master_addresses_str, ",");
-
- client::sp::shared_ptr<KuduClient> client;
- RETURN_NOT_OK(KuduClientBuilder()
- .master_server_addrs(master_addresses)
- .Build(&client));
- vector<string> table_names;
- RETURN_NOT_OK(client->ListTables(&table_names));
-
- for (const auto& tname : table_names) {
- cout << tname << endl;
- if (!FLAGS_list_tablets) {
- continue;
- }
- client::sp::shared_ptr<KuduTable> client_table;
- RETURN_NOT_OK(client->OpenTable(tname, &client_table));
- vector<KuduScanToken*> tokens;
- ElementDeleter deleter(&tokens);
- KuduScanTokenBuilder builder(client_table.get());
- RETURN_NOT_OK(builder.Build(&tokens));
-
- for (const auto* token : tokens) {
- cout << "T " << token->tablet().id() << "\t";
- for (const auto* replica : token->tablet().replicas()) {
- cout << "P" << (replica->is_leader() ? "(L) " : " ")
- << replica->ts().uuid() << "(" << replica->ts().hostname()
- << ":" << replica->ts().port() << ")" << " ";
- }
- cout << endl;
- }
- cout << endl;
- }
- return Status::OK();
+ return TableLister::ListTablets(Split(master_addresses_str, ","));
}
} // anonymous namespace
[2/3] kudu git commit: Disable flaky KUDU-1097 tablet move test
Posted by mp...@apache.org.
Disable flaky KUDU-1097 tablet move test
This test is flaky because the leader may get marked with replace, which
is not currently reliably supported when KUDU-1097 is enabled.
Change-Id: I118accafd7da99c35beb5a4f4a9f164984f17baf
Reviewed-on: http://gerrit.cloudera.org:8080/8720
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/8734
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d0d6994d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d0d6994d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d0d6994d
Branch: refs/heads/branch-1.6.x
Commit: d0d6994d4e212e4058af7ce1207b3991e98b8ea6
Parents: 7c442fb
Author: Mike Percy <mp...@apache.org>
Authored: Fri Dec 1 16:25:07 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:29:13 2017 +0000
----------------------------------------------------------------------
src/kudu/tools/kudu-admin-test.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/d0d6994d/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index fe91dd9..5b7afaf 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -315,7 +315,7 @@ TEST_F(AdminCliTest, TestMoveTablet_pre_KUDU_1097) {
DoTestMoveTablet(kDisableKudu1097);
}
-TEST_F(AdminCliTest, TestMoveTablet_KUDU_1097) {
+TEST_F(AdminCliTest, DISABLED_TestMoveTablet_KUDU_1097) {
DoTestMoveTablet(kEnableKudu1097);
}
[3/3] kudu git commit: KUDU-1097. Only add/evict when processing
leader tablet reports
Posted by mp...@apache.org.
KUDU-1097. Only add/evict when processing leader tablet reports
The leader replica is the only replica that sends a health report. The
master needs the health report to make an eviction decision. Thus, we
should only consider eviction or addition of a new replica if the tablet
report was received from the leader.
There is no standalone test for this but this patch is required for the
following patch, "Never evict a leader", to pass due to the DCHECK
introduced in that patch.
Change-Id: If7e318e042cd27ff4544bc5aae76db4ab9e51da1
Reviewed-on: http://gerrit.cloudera.org:8080/8682
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/8735
Tested-by: Mike Percy <mp...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8a570e77
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8a570e77
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8a570e77
Branch: refs/heads/branch-1.6.x
Commit: 8a570e7720d6947f678b41df018413db80821a14
Parents: d0d6994
Author: Mike Percy <mp...@apache.org>
Authored: Wed Nov 29 04:24:13 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:29:22 2017 +0000
----------------------------------------------------------------------
src/kudu/master/catalog_manager.cc | 44 ++++++++++++++++++---------------
1 file changed, 24 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/8a570e77/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index beb4302..8144d74 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -3488,32 +3488,36 @@ Status CatalogManager::ProcessTabletReport(
// 7e. Make tablet configuration change depending on the mode the server
// is running with. The choice between two alternative modes is controlled
// by the 'raft_prepare_replacement_before_eviction' run-time flag.
- if (FLAGS_raft_prepare_replacement_before_eviction) {
- // An alternative scheme of managing tablet replicas: the catalog
- // manager processes the health-related info on replicas from the tablet
- // report and initiates appropriate modifications for the tablet Raft
- // configuration: evict an already-replaced failed voter replica or add
- // a new non-voter replica marked for promotion as a replacement.
+ if (!FLAGS_raft_prepare_replacement_before_eviction) {
+ if (consensus_state_updated &&
+ FLAGS_master_add_server_when_underreplicated &&
+ CountVoters(cstate.committed_config()) < replication_factor) {
+ // Add a server to the config if it is under-replicated.
+ //
+ // This is an idempotent operation due to a CAS enforced on the
+ // committed config's opid_index.
+ rpcs.emplace_back(new AsyncAddReplicaTask(
+ master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
+ }
+
+ // When --raft_prepare_replacement_before_eviction is enabled, we
+ // consider whether to add or evict replicas based on the health report
+ // included in the leader's tablet report. Since only the leader tracks
+ // health, we ignore reports from non-leaders in this case.
+ } else if (!cstate.leader_uuid().empty() &&
+ ts_desc->permanent_uuid() == cstate.leader_uuid()) {
const RaftConfigPB& config = cstate.committed_config();
string to_evict;
- if (IsUnderReplicated(config, replication_factor)) {
- rpcs.emplace_back(new AsyncAddReplicaTask(
- master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
- } else if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
- CanEvictReplica(config, replication_factor, &to_evict)) {
+ if (PREDICT_TRUE(FLAGS_catalog_manager_evict_excess_replicas) &&
+ CanEvictReplica(config, replication_factor, &to_evict)) {
DCHECK(!to_evict.empty());
rpcs.emplace_back(new AsyncEvictReplicaTask(
master_, tablet, cstate, std::move(to_evict)));
+ } else if (FLAGS_master_add_server_when_underreplicated &&
+ IsUnderReplicated(config, replication_factor)) {
+ rpcs.emplace_back(new AsyncAddReplicaTask(
+ master_, tablet, cstate, RaftPeerPB::NON_VOTER, &rng_));
}
- } else if (consensus_state_updated &&
- FLAGS_master_add_server_when_underreplicated &&
- CountVoters(cstate.committed_config()) < replication_factor) {
- // Add a server to the config if it is under-replicated.
- //
- // This is an idempotent operation due to a CAS enforced on the
- // committed config's opid_index.
- rpcs.emplace_back(new AsyncAddReplicaTask(
- master_, tablet, cstate, RaftPeerPB::VOTER, &rng_));
}
}