You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2020/01/24 23:55:40 UTC

[kudu] branch master updated: tools: add tool to get the quiescing status of a tablet server

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new eb48992  tools: add tool to get the quiescing status of a tablet server
eb48992 is described below

commit eb48992c27a490ca30d740f82f3da6995a2855e4
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Thu Jan 23 15:26:11 2020 -0800

    tools: add tool to get the quiescing status of a tablet server
    
    Added a tool to get the quiescing status of a tablet server.
    
    [awong@ve1315 ~]$ sudo -u kudu kudu tserver quiesce status ve1316.halxg.cloudera.com
     Quiescing | Tablet leader count | Active scanner count
    -----------+---------------------+----------------------
     false     |       0             |       0
    
    Change-Id: I233753cd7daba921506eb1e8e7bf7cdfae3ef0d5
    Reviewed-on: http://gerrit.cloudera.org:8080/15102
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 .../tablet_server_quiescing-itest.cc               | 36 +++++++++++++++++++++-
 src/kudu/tools/kudu-tool-test.cc                   |  1 +
 src/kudu/tools/tool_action_tserver.cc              | 30 ++++++++++++++++--
 src/kudu/tserver/tablet_service.cc                 |  1 +
 src/kudu/tserver/tserver_admin.proto               |  9 ++++--
 5 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/src/kudu/integration-tests/tablet_server_quiescing-itest.cc b/src/kudu/integration-tests/tablet_server_quiescing-itest.cc
index 73c6e40..3b660ea 100644
--- a/src/kudu/integration-tests/tablet_server_quiescing-itest.cc
+++ b/src/kudu/integration-tests/tablet_server_quiescing-itest.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <atomic>
+#include <cstdio>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -67,6 +68,7 @@ using kudu::client::KuduScanner;
 using kudu::client::KuduTable;
 using kudu::client::sp::shared_ptr;
 using kudu::tools::RunActionPrependStdoutStderr;
+using kudu::tools::RunKuduTool;
 using kudu::tserver::MiniTabletServer;
 using std::string;
 using std::unique_ptr;
@@ -359,7 +361,7 @@ TEST_F(TServerQuiescingITest, TestQuiesceLeaderWhileFollowersCatchingUp) {
 // Basic test that we see the quiescing state change in the server.
 TEST_F(TServerQuiescingITest, TestQuiescingToolBasics) {
   NO_FATALS(StartCluster(1));
-  const auto* ts = cluster_->mini_tablet_server(0);
+  auto* ts = cluster_->mini_tablet_server(0);
   auto rw_workload = CreateFaultIntolerantRWWorkload();
   rw_workload->Setup();
   ASSERT_FALSE(ts->server()->quiescing());
@@ -368,11 +370,43 @@ TEST_F(TServerQuiescingITest, TestQuiescingToolBasics) {
     ASSERT_OK(RunActionPrependStdoutStderr(
         Substitute("tserver quiesce start $0", ts->bound_rpc_addr().ToString())));
     ASSERT_TRUE(ts->server()->quiescing());
+
+    // Running the status tool should report what we expect and not change the
+    // state.
+    string stdout;
+    ASSERT_OK(RunKuduTool({ "tserver", "quiesce", "status", ts->bound_rpc_addr().ToString() },
+                          &stdout));
+    ASSERT_STR_CONTAINS(stdout,
+        " Quiescing | Tablet leader count | Active scanner count\n"
+        "-----------+---------------------+----------------------\n"
+        " true      |       1             |       0");
+    ASSERT_TRUE(ts->server()->quiescing());
   }
   ASSERT_OK(RunActionPrependStdoutStderr(
       Substitute("tserver quiesce stop $0", ts->bound_rpc_addr().ToString())));
+  string stdout;
+  ASSERT_OK(RunKuduTool({ "tserver", "quiesce", "status", ts->bound_rpc_addr().ToString() },
+                        &stdout));
+  ASSERT_STR_CONTAINS(stdout,
+      " Quiescing | Tablet leader count | Active scanner count\n"
+      "-----------+---------------------+----------------------\n"
+      " false     |       1             |       0");
   ASSERT_FALSE(ts->server()->quiescing());
 
+  // Now try getting the status with some scanners.
+  rw_workload->Start();
+  ASSERT_EVENTUALLY([&] {
+    ASSERT_OK(RunKuduTool({ "tserver", "quiesce", "status", ts->bound_rpc_addr().ToString() },
+                          &stdout));
+    ASSERT_STR_CONTAINS(stdout, Substitute(
+        " Quiescing | Tablet leader count | Active scanner count\n"
+        "-----------+---------------------+----------------------\n"
+        " false     |       1             |       $0",
+        ts->server()->scanner_manager()->CountActiveScanners()));
+  });
+  ASSERT_FALSE(ts->server()->quiescing());
+  NO_FATALS(rw_workload->StopAndJoin());
+
   // Now try starting again but expecting errors.
   Status s = RunActionPrependStdoutStderr(
       Substitute("tserver quiesce start $0 --error_if_not_fully_quiesced",
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index dc81b6a..d2b1190 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1180,6 +1180,7 @@ TEST_F(ToolTest, TestModeHelp) {
   }
   {
     const vector<string> kTServerQuiesceModeRegexes = {
+        "status.*Output information about the quiescing state",
         "start.*Start quiescing the given Tablet Server",
         "stop.*Stop quiescing a Tablet Server",
     };
diff --git a/src/kudu/tools/tool_action_tserver.cc b/src/kudu/tools/tool_action_tserver.cc
index f2875d4..0540d86 100644
--- a/src/kudu/tools/tool_action_tserver.cc
+++ b/src/kudu/tools/tool_action_tserver.cc
@@ -31,6 +31,7 @@
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/join.h"
+#include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/master.pb.h"
@@ -269,15 +270,33 @@ Status StopQuiescingTServer(const RunnerContext& context) {
   req.set_return_stats(false);
   QuiesceTabletServerResponsePB resp;
   RpcController rpc;
-
   RETURN_NOT_OK(proxy->Quiesce(req, &resp, &rpc));
   if (resp.has_error()) {
     return StatusFromPB(resp.error().status());
   }
-
   return Status::OK();
 }
 
+Status QuiescingStatus(const RunnerContext& context) {
+  const auto& address = FindOrDie(context.required_args, kTServerAddressArg);
+  unique_ptr<TabletServerAdminServiceProxy> proxy;
+  RETURN_NOT_OK(BuildProxy(address, tserver::TabletServer::kDefaultPort, &proxy));
+
+  QuiesceTabletServerRequestPB req;
+  req.set_return_stats(true);
+  QuiesceTabletServerResponsePB resp;
+  RpcController rpc;
+  RETURN_NOT_OK(proxy->Quiesce(req, &resp, &rpc));
+  if (resp.has_error()) {
+    return StatusFromPB(resp.error().status());
+  }
+  DataTable table({});
+  table.AddColumn("Quiescing", { resp.is_quiescing() ? "true" : "false" });
+  table.AddColumn("Tablet leader count", { IntToString(resp.num_leaders()) });
+  table.AddColumn("Active scanner count", { IntToString(resp.num_active_scanners()) });
+  return table.PrintTo(cout);
+}
+
 } // anonymous namespace
 
 unique_ptr<Mode> BuildTServerMode() {
@@ -356,6 +375,12 @@ unique_ptr<Mode> BuildTServerMode() {
       .AddOptionalParameter("timeout_ms")
       .Build();
 
+  unique_ptr<Action> quiescing_status =
+      ActionBuilder("status", &QuiescingStatus)
+      .Description("Output information about the quiescing state of a Tablet "
+                   "Server.")
+      .AddRequiredParameter({ kTServerAddressArg, kTServerAddressDesc })
+      .Build();
   unique_ptr<Action> start_quiescing =
       ActionBuilder("start", &StartQuiescingTServer)
       .Description("Start quiescing the given Tablet Server. While a Tablet "
@@ -372,6 +397,7 @@ unique_ptr<Mode> BuildTServerMode() {
       .Build();
   unique_ptr<Mode> quiesce = ModeBuilder("quiesce")
       .Description("Operate on the quiescing state of a Kudu Tablet Server.")
+      .AddAction(std::move(quiescing_status))
       .AddAction(std::move(start_quiescing))
       .AddAction(std::move(stop_quiescing))
       .Build();
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 48b5543..8843bad 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -1029,6 +1029,7 @@ void TabletServiceAdminImpl::Quiesce(const QuiesceTabletServerRequestPB* req,
     LOG(INFO) << Substitute("Tablet server has $0 leaders and $1 scanners",
         resp->num_leaders(), resp->num_active_scanners());
   }
+  resp->set_is_quiescing(server_->quiescing());
   context->RespondSuccess();
 }
 
diff --git a/src/kudu/tserver/tserver_admin.proto b/src/kudu/tserver/tserver_admin.proto
index e61db11..8e8eb6b 100644
--- a/src/kudu/tserver/tserver_admin.proto
+++ b/src/kudu/tserver/tserver_admin.proto
@@ -163,11 +163,16 @@ message QuiesceTabletServerResponsePB {
   // The error, if an error occurred with this request.
   optional TabletServerErrorPB error = 1;
 
+  // Returns whether the tablet server after having processed the request is
+  // quiescing. This field is returned regardless of whether the server's
+  // quiescing state was changed.
+  optional bool is_quiescing = 2;
+
   // The number of active scanners on the given tablet server. Only returned if
   // stats were requested.
-  optional int32 num_active_scanners = 2;
+  optional int32 num_active_scanners = 3;
 
   // The number of tablet leaders hosted on the given tablet server. Only
   // returned if stats were requested.
-  optional int32 num_leaders = 3;
+  optional int32 num_leaders = 4;
 }