You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2019/10/08 22:57:01 UTC
[kudu] 03/03: KUDU-2069 p8: support listing tserver state through
CLI
This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 1f4d473c74e7d57eeae3b384a46f4eb088e16223
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Tue Oct 8 13:28:11 2019 -0700
KUDU-2069 p8: support listing tserver state through CLI
This patch adds support to the tserver list tool to show the tserver
states.
$ kudu tserver list <master> --columns=uuid,state
I opted to have the state be an optional return field because there are
cases where a tablet that is unregistered will have a state associated
with it (e.g. if the master is restarted), in which case it may be
surprising to see info about them, unless explicitly requested.
Change-Id: I5917927f3bc4bb6bebd1cfb63555ec20d94d9091
Reviewed-on: http://gerrit.cloudera.org:8080/14369
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
---
src/kudu/master/master.proto | 6 +++++
src/kudu/master/master_service.cc | 28 +++++++++++++++++++-
src/kudu/master/ts_manager.cc | 5 ++++
src/kudu/master/ts_manager.h | 7 ++++-
src/kudu/tools/kudu-tool-test.cc | 49 +++++++++++++++++++++++++++++++++++
src/kudu/tools/tool_action_tserver.cc | 21 ++++++++++-----
6 files changed, 108 insertions(+), 8 deletions(-)
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index 9d37612..e480b6c 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -805,6 +805,11 @@ message HiveMetastoreConfig {
// ============================================================================
message ListTabletServersRequestPB {
+ // Whether or not to include the tserver states.
+ // Note: this may include states of tservers that haven't been registered and
+ // thus don't have a complete response Entries. In such cases, the Entries
+ // will be returned with bogus info w.r.t registration and heartbeating.
+ optional bool include_states = 1;
}
message ListTabletServersResponsePB {
@@ -815,6 +820,7 @@ message ListTabletServersResponsePB {
optional ServerRegistrationPB registration = 2;
optional int32 millis_since_heartbeat = 3;
optional string location = 4;
+ optional TServerStatePB state = 5;
}
repeated Entry servers = 2;
}
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index 5c23933..5b20d6b 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -20,6 +20,7 @@
#include <memory>
#include <ostream>
#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -33,6 +34,7 @@
#include "kudu/common/wire_protocol.pb.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/consensus/replica_management.pb.h"
+#include "kudu/gutil/map-util.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/hms/hms_catalog.h"
@@ -547,7 +549,10 @@ void MasterServiceImpl::GetTableSchema(const GetTableSchemaRequestPB* req,
void MasterServiceImpl::ListTabletServers(const ListTabletServersRequestPB* req,
ListTabletServersResponsePB* resp,
rpc::RpcContext* rpc) {
- vector<std::shared_ptr<TSDescriptor> > descs;
+ TSManager* ts_manager = server_->ts_manager();
+ TServerStateMap states = req->include_states() ?
+ ts_manager->GetTServerStates() : TServerStateMap();
+ vector<std::shared_ptr<TSDescriptor>> descs;
server_->ts_manager()->GetAllDescriptors(&descs);
for (const std::shared_ptr<TSDescriptor>& desc : descs) {
ListTabletServersResponsePB::Entry* entry = resp->add_servers();
@@ -555,6 +560,27 @@ void MasterServiceImpl::ListTabletServers(const ListTabletServersRequestPB* req,
desc->GetRegistration(entry->mutable_registration());
entry->set_millis_since_heartbeat(desc->TimeSinceHeartbeat().ToMilliseconds());
if (desc->location()) entry->set_location(desc->location().get());
+
+ // If we need to return states, do so.
+ const auto& uuid = desc->permanent_uuid();
+ const auto state = FindWithDefault(states, uuid, TServerStatePB::NONE);
+ entry->set_state(state);
+ states.erase(uuid);
+ }
+ // If there are any states left (e.g. they don't correspond to a registered
+ // server), report them. Set a bogus seqno, since the servers have not
+ // registered.
+ for (const auto& ts_and_state : states) {
+ const auto& ts = ts_and_state.first;
+ const auto& state = ts_and_state.second;
+ ListTabletServersResponsePB::Entry* entry = resp->add_servers();
+ NodeInstancePB* instance = entry->mutable_instance_id();
+ instance->set_permanent_uuid(ts);
+ instance->set_instance_seqno(-1);
+ entry->set_millis_since_heartbeat(-1);
+ // Note: The state map should only track non-NONE states.
+ DCHECK_NE(TServerStatePB::NONE, state);
+ entry->set_state(state);
}
rpc->RespondSuccess();
}
diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc
index 9caaa32..46f68d9 100644
--- a/src/kudu/master/ts_manager.cc
+++ b/src/kudu/master/ts_manager.cc
@@ -216,6 +216,11 @@ unordered_set<string> TSManager::GetUuidsToIgnoreForUnderreplication() const {
return uuids;
}
+TServerStateMap TSManager::GetTServerStates() const {
+ shared_lock<RWMutex> tsl(ts_state_lock_);
+ return ts_state_by_uuid_;
+}
+
void TSManager::GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const {
descs->clear();
shared_lock<RWMutex> tsl(ts_state_lock_);
diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h
index 7e3e42f..dc5feb8 100644
--- a/src/kudu/master/ts_manager.h
+++ b/src/kudu/master/ts_manager.h
@@ -41,6 +41,8 @@ namespace master {
class LocationCache;
class SysCatalogTable;
+typedef std::unordered_map<std::string, TServerStatePB> TServerStateMap;
+
// Tracks the servers that the master has heard from, along with their
// last heartbeat, etc.
//
@@ -96,6 +98,9 @@ class TSManager {
// mode).
std::unordered_set<std::string> GetUuidsToIgnoreForUnderreplication() const;
+ // Return the tablet server states for each tablet server UUID.
+ TServerStateMap GetTServerStates() const;
+
// Get the TS count.
int GetCount() const;
@@ -151,7 +156,7 @@ class TSManager {
// Maps from the UUIDs of tablet servers to their tserver state, if any.
// Note: the states don't necessarily belong to registered tablet servers.
- std::unordered_map<std::string, TServerStatePB> ts_state_by_uuid_;
+ TServerStateMap ts_state_by_uuid_;
LocationCache* location_cache_;
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 30a09a0..a5066dc 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2915,6 +2915,55 @@ TEST_F(ToolTest, TestTserverListLocationNotAssigned) {
ASSERT_STR_CONTAINS(out, cluster_->tablet_server(0)->uuid() + ",<none>");
}
+TEST_F(ToolTest, TestTServerListState) {
+ NO_FATALS(StartExternalMiniCluster());
+ string master_addr = cluster_->master()->bound_rpc_addr().ToString();
+ const string ts_uuid = cluster_->tablet_server(0)->uuid();
+
+ // First, set some tserver state.
+ NO_FATALS(RunActionStdoutNone(Substitute("tserver state enter_maintenance $0 $1",
+ master_addr, ts_uuid)));
+
+ // If the state isn't requested, we shouldn't see any.
+ string out;
+ NO_FATALS(RunActionStdoutString(
+ Substitute("tserver list $0 --columns=uuid --format=csv", master_addr), &out));
+ ASSERT_STR_NOT_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE"));
+
+ // If it is requested, we should see the state.
+ NO_FATALS(RunActionStdoutString(
+ Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out));
+ ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE"));
+
+ // We should still see the state if the tserver hasn't been registered.
+ // "Unregister" the server by restarting the cluster and only bringing up the
+ // master, and verify that we still see the tserver in maintenance mode.
+ cluster_->Shutdown();
+ ASSERT_OK(cluster_->master()->Restart());
+ master_addr = cluster_->master()->bound_rpc_addr().ToString();
+ NO_FATALS(RunActionStdoutString(
+ Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out));
+ ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "MAINTENANCE_MODE"));
+
+ NO_FATALS(RunActionStdoutNone(Substitute("tserver state exit_maintenance $0 $1",
+ master_addr, ts_uuid)));
+
+ // Once removed, we should see none.
+ // Since the tserver hasn't registered, there should be no output at all.
+ NO_FATALS(RunActionStdoutString(
+ Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out));
+ ASSERT_EQ("", out);
+
+ // And once the tserver has registered, we should see no state. Assert
+ // eventually to give the tserver some time to register.
+ ASSERT_OK(cluster_->tablet_server(0)->Restart());
+ ASSERT_EVENTUALLY([&] {
+ NO_FATALS(RunActionStdoutString(
+ Substitute("tserver list $0 --columns=uuid,state --format=csv", master_addr), &out));
+ ASSERT_STR_CONTAINS(out, Substitute("$0,$1", ts_uuid, "NONE"));
+ });
+}
+
TEST_F(ToolTest, TestMasterList) {
ExternalMiniClusterOptions opts;
opts.num_tablet_servers = 0;
diff --git a/src/kudu/tools/tool_action_tserver.cc b/src/kudu/tools/tool_action_tserver.cc
index 744458c..bf62257 100644
--- a/src/kudu/tools/tool_action_tserver.cc
+++ b/src/kudu/tools/tool_action_tserver.cc
@@ -32,7 +32,6 @@
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/join.h"
#include "kudu/gutil/strings/split.h"
-#include "kudu/gutil/strings/stringpiece.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/master/master.pb.h"
#include "kudu/master/master.proxy.h"
@@ -100,11 +99,17 @@ Status ListTServers(const RunnerContext& context) {
LeaderMasterProxy proxy;
RETURN_NOT_OK(proxy.Init(context));
+ const vector<string> cols = strings::Split(FLAGS_columns, ",", strings::SkipEmpty());
ListTabletServersRequestPB req;
+ for (const auto& col : cols) {
+ if (boost::iequals(col, "state")) {
+ req.set_include_states(true);
+ }
+ }
ListTabletServersResponsePB resp;
- proxy.SyncRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>(
- req, &resp, "ListTabletServers", &MasterServiceProxy::ListTabletServersAsync);
+ RETURN_NOT_OK((proxy.SyncRpc<ListTabletServersRequestPB, ListTabletServersResponsePB>(
+ req, &resp, "ListTabletServers", &MasterServiceProxy::ListTabletServersAsync)));
if (resp.has_error()) {
return StatusFromPB(resp.error().status());
@@ -117,7 +122,7 @@ Status ListTServers(const RunnerContext& context) {
return strings::Substitute("$0:$1", hostport.host(), hostport.port());
};
- for (const auto& column : strings::Split(FLAGS_columns, ",", strings::SkipEmpty())) {
+ for (const auto& column : cols) {
vector<string> values;
if (boost::iequals(column, "uuid")) {
for (const auto& server : servers) {
@@ -156,10 +161,14 @@ Status ListTServers(const RunnerContext& context) {
for (const auto& server : servers) {
values.emplace_back(StartTimeToString(server.registration()));
}
+ } else if (boost::iequals(column, "state")) {
+ for (const auto& server : servers) {
+ values.emplace_back(TServerStatePB_Name(server.state()));
+ }
} else {
return Status::InvalidArgument("unknown column (--columns)", column);
}
- table.AddColumn(column.ToString(), std::move(values));
+ table.AddColumn(column, std::move(values));
}
RETURN_NOT_OK(table.PrintTo(cout));
@@ -248,7 +257,7 @@ unique_ptr<Mode> BuildTServerMode() {
string("Comma-separated list of tserver info fields to "
"include in output.\nPossible values: uuid, "
"rpc-addresses, http-addresses, version, seqno, "
- "heartbeat and start_time"))
+ "heartbeat, start_time, state"))
.AddOptionalParameter("format")
.AddOptionalParameter("timeout_ms")
.Build();