You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/08/26 22:26:07 UTC

[1/2] kudu git commit: ksck: colorize and clean up output

Repository: kudu
Updated Branches:
  refs/heads/master 362743c34 -> db6d22da4


ksck: colorize and clean up output

Dan and I were looking at some ksck output earlier and found it somewhat
hard to read. This colorizes the output and also cleans up the format to
be slightly less messy. The output now goes to stdout instead of stderr
as well, so it's easier to pipe to a file or 'less', which is a common
use case.

Example output:

Tablet 0e4e79de2c2547c091779b13735d2b73 of table 'my_table' is under-replicated: 1 replica(s) not RUNNING
  63c4785c3b6f47f5a1c23ffbf6e52b71 (ts-023.foo.com:7050): RUNNING
  cd20c68cd23c4cf986eda0936a5691cc (ts-004.foo.com:7050): RUNNING [LEADER]
  5edf82f0516b4897b3a7991a7e67d71c (ts-028.foo.com:7050): bad state
    State:       NOT_STARTED
    Data state:  TABLET_DATA_COPYING
    Last status: TabletCopy: Downloading block 4611685629730158289 (3491/9569)

Change-Id: Icc5196d63cbcbcbb2a9aba1ff17377b678c104bd
Reviewed-on: http://gerrit.cloudera.org:8080/4129
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cf0eb4dd
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cf0eb4dd
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cf0eb4dd

Branch: refs/heads/master
Commit: cf0eb4ddae4fbcff23cc1a39171ce7ae997f1997
Parents: 362743c
Author: Todd Lipcon <to...@apache.org>
Authored: Thu Aug 25 18:23:50 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Fri Aug 26 22:24:05 2016 +0000

----------------------------------------------------------------------
 src/kudu/tools/CMakeLists.txt         |   2 +
 src/kudu/tools/color.cc               |  80 ++++++++++
 src/kudu/tools/color.h                |  37 +++++
 src/kudu/tools/ksck-test.cc           |  70 ++++----
 src/kudu/tools/ksck.cc                | 247 ++++++++++++++++-------------
 src/kudu/tools/ksck.h                 |   9 +-
 src/kudu/tools/tool_action_cluster.cc |   1 +
 7 files changed, 299 insertions(+), 147 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index 614bf11..fe95bad 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -30,6 +30,7 @@ set(LINK_LIBS
 )
 
 add_library(kudu_tools_util
+  color.cc
   data_gen_util.cc)
 target_link_libraries(kudu_tools_util
   ${LINK_LIBS})
@@ -75,6 +76,7 @@ add_library(ksck
 )
 target_link_libraries(ksck
   consensus
+  kudu_tools_util
   master_proto
   server_base_proto
   tserver_proto

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/color.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/color.cc b/src/kudu/tools/color.cc
new file mode 100644
index 0000000..e90761d
--- /dev/null
+++ b/src/kudu/tools/color.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/tools/color.h"
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <unistd.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/flag_tags.h"
+
+DEFINE_string(color, "auto",
+              "Specifies whether output should be colorized. The default value "
+              "'auto' colorizes output if the output is a terminal. The other "
+              "valid values are 'always' or 'never'.");
+TAG_FLAG(color, stable);
+
+static bool ValidateColorFlag(const char* flagname, const string& value) {
+  if (value == "always" ||
+      value == "auto" ||
+      value == "never") {
+    return true;
+  }
+  LOG(ERROR) << "option 'color' expects \"always\", \"auto\", or \"never\"";
+  return false;
+}
+static bool dummy = google::RegisterFlagValidator(
+    &FLAGS_color, &ValidateColorFlag);
+
+
+namespace kudu {
+namespace tools {
+
+namespace {
+bool UseColor() {
+  if (FLAGS_color == "never") return false;
+  if (FLAGS_color == "always") return true;
+  return isatty(STDOUT_FILENO);
+}
+
+const char* StringForCode(AnsiCode color) {
+  if (!UseColor()) return "";
+
+  // Codes from: https://en.wikipedia.org/wiki/ANSI_escape_code
+  switch (color) {
+    case AnsiCode::RED:    return "\x1b[31m";
+    case AnsiCode::GREEN:  return "\x1b[32m";
+    case AnsiCode::YELLOW: return "\x1b[33m";
+    case AnsiCode::BLUE:   return "\x1b[34m";
+    case AnsiCode::RESET:  return "\x1b[m";
+  }
+  LOG(FATAL);
+  return "";
+}
+} // anonymous namespace
+
+string Color(AnsiCode color, StringPiece s) {
+  return strings::Substitute("$0$1$2",
+                             StringForCode(color),
+                             s,
+                             StringForCode(AnsiCode::RESET));
+}
+
+} // namespace tools
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/color.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/color.h b/src/kudu/tools/color.h
new file mode 100644
index 0000000..ef0afbe
--- /dev/null
+++ b/src/kudu/tools/color.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <string>
+
+#include "kudu/gutil/strings/stringpiece.h"
+
+namespace kudu {
+namespace tools {
+
+enum class AnsiCode {
+  RED,
+  YELLOW,
+  GREEN,
+  BLUE,
+  RESET
+};
+
+std::string Color(AnsiCode color, StringPiece s);
+
+} // namespace tools
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 1a7a040..aae4656 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gflags/gflags.h>
 #include <gtest/gtest.h>
 #include <memory>
 #include <unordered_map>
@@ -25,6 +26,8 @@
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/test_util.h"
 
+DECLARE_string(color);
+
 namespace kudu {
 namespace tools {
 
@@ -112,6 +115,7 @@ class KsckTest : public KuduTest {
       : master_(new MockKsckMaster()),
         cluster_(new KsckCluster(static_pointer_cast<KsckMaster>(master_))),
         ksck_(new Ksck(cluster_)) {
+    FLAGS_color = "never";
     unordered_map<string, shared_ptr<KsckTabletServer>> tablet_servers;
     for (int i = 0; i < 3; i++) {
       string name = Substitute("ts-id-$0", i);
@@ -289,25 +293,22 @@ TEST_F(KsckTest, TestBadTabletServer) {
       "ts-id-1 (<mock>): Network error: Network failure");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-0 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-1 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-1 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-2 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
 }
 
 TEST_F(KsckTest, TestZeroTabletReplicasCheck) {
@@ -340,7 +341,7 @@ TEST_F(KsckTest, TestOneSmallReplicatedTable) {
   Status s = ksck_->ChecksumData(ChecksumOptions());
   EXPECT_EQ("Not found: No table found. Filter: table_filters=xyz", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "INFO: The cluster doesn't have any matching tables");
+                      "The cluster doesn't have any matching tables");
 
   // Test filtering with a matching table pattern.
   err_stream_.str("");
@@ -365,7 +366,8 @@ TEST_F(KsckTest, TestOneOneTabletBrokenTable) {
   Status s = RunKsck();
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "Tablet tablet-id-1 of table 'test' has 2 instead of 3 replicas");
+                      "Tablet tablet-id-1 of table 'test' is under-replicated: "
+                      "configuration has 2 replicas vs desired 3");
 }
 
 TEST_F(KsckTest, TestMismatchedAssignments) {
@@ -377,9 +379,11 @@ TEST_F(KsckTest, TestMismatchedAssignments) {
   Status s = RunKsck();
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n"
-                      "------------------------------------------------------------\n"
-                      "WARNING: Missing a tablet replica on tablet server ts-id-0 (<mock>)\n");
+                      "Tablet tablet-id-2 of table 'test' is under-replicated: "
+                      "1 replica(s) not RUNNING\n"
+                      "  ts-id-0 (<mock>): missing [LEADER]\n"
+                      "  ts-id-1 (<mock>): RUNNING\n"
+                      "  ts-id-2 (<mock>): RUNNING\n");
 }
 
 TEST_F(KsckTest, TestTabletNotRunning) {
@@ -389,19 +393,19 @@ TEST_F(KsckTest, TestTabletNotRunning) {
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Bad state on TS ts-id-0 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "WARNING: Bad state on TS ts-id-1 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "WARNING: Bad state on TS ts-id-2 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "ERROR: Tablet tablet-id-0 of table 'test' does not have a majority of "
-      "replicas in RUNNING state\n");
+      "Tablet tablet-id-0 of table 'test' is unavailable: 3 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): bad state [LEADER]\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n"
+      "  ts-id-1 (<mock>): bad state\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n"
+      "  ts-id-2 (<mock>): bad state\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n");
 }
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 8d7e4f5..7165280 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -18,8 +18,10 @@
 #include "kudu/tools/ksck.h"
 
 #include <algorithm>
+#include <boost/optional.hpp>
 #include <glog/logging.h>
 #include <iostream>
+#include <map>
 #include <mutex>
 
 #include "kudu/consensus/quorum_util.h"
@@ -29,6 +31,7 @@
 #include "kudu/gutil/strings/human_readable.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
+#include "kudu/tools/color.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/blocking_queue.h"
 #include "kudu/util/locks.h"
@@ -38,7 +41,6 @@
 namespace kudu {
 namespace tools {
 
-using std::cerr;
 using std::cout;
 using std::endl;
 using std::ostream;
@@ -60,26 +62,23 @@ DEFINE_uint64(checksum_snapshot_timestamp, ChecksumOptions::kCurrentTimestamp,
 DEFINE_int32(fetch_replica_info_concurrency, 20,
              "Number of concurrent tablet servers to fetch replica info from.");
 
-// The stream to write output to. If this is NULL, defaults to cerr.
+// The stream to write output to. If this is NULL, defaults to cout.
 // This is used by tests to capture output.
 ostream* g_err_stream = NULL;
 
-// Print an informational message to cerr.
+// Print an informational message to cout.
 static ostream& Out() {
-  return (g_err_stream ? *g_err_stream : cerr);
-}
-static ostream& Info() {
-  return Out() << "INFO: ";
+  return (g_err_stream ? *g_err_stream : cout);
 }
 
-// Print a warning message to cerr.
+// Print a warning message to cout.
 static ostream& Warn() {
-  return Out() << "WARNING: ";
+  return Out() << Color(AnsiCode::YELLOW, "WARNING: ");
 }
 
-// Print an error message to cerr.
+// Print an error message to cout.
 static ostream& Error() {
-  return Out() << "ERROR: ";
+  return Out() << Color(AnsiCode::RED, "WARNING: ");
 }
 
 namespace {
@@ -151,7 +150,7 @@ Status Ksck::CheckMasterRunning() {
   VLOG(1) << "Connecting to the Master";
   Status s = cluster_->master()->Connect();
   if (s.ok()) {
-    Info() << "Connected to the Master" << endl;
+    Out() << "Connected to the Master" << endl;
   }
   return s;
 }
@@ -188,7 +187,7 @@ Status Ksck::FetchInfoFromTabletServers() {
   pool->Wait();
 
   if (bad_servers.Load() == 0) {
-    Info() << Substitute("Fetched info from all $0 Tablet Servers", servers_count) << endl;
+    Out() << Substitute("Fetched info from all $0 Tablet Servers", servers_count) << endl;
     return Status::OK();
   } else {
     Warn() << Substitute("Fetched info from $0 Tablet Servers, $1 weren't reachable",
@@ -221,15 +220,16 @@ Status Ksck::CheckTablesConsistency() {
     if (!VerifyTable(table)) {
       bad_tables_count++;
     }
+    Out() << endl;
   }
 
   if (tables_checked == 0) {
-    Info() << "The cluster doesn't have any matching tables" << endl;
+    Out() << "The cluster doesn't have any matching tables" << endl;
     return Status::OK();
   }
 
   if (bad_tables_count == 0) {
-    Info() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked) << endl;
+    Out() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked) << endl;
     return Status::OK();
   } else {
     Warn() << Substitute("$0 out of $1 table(s) are not in a healthy state",
@@ -289,7 +289,7 @@ class ChecksumResultReporter : public RefCountedThreadSafe<ChecksumResultReporte
       done = responses_.WaitFor(MonoDelta::FromMilliseconds(std::min(rem_ms, 5000)));
       string status = done ? "finished in " : "running for ";
       int run_time_sec = (MonoTime::Now() - start).ToSeconds();
-      Info() << "Checksum " << status << run_time_sec << "s: "
+      Out() << "Checksum " << status << run_time_sec << "s: "
              << responses_.count() << "/" << expected_count_ << " replicas remaining ("
              << HumanReadableNumBytes::ToString(disk_bytes_summed_.Load()) << " from disk, "
              << HumanReadableInt::ToString(rows_summed_.Load()) << " rows summed)"
@@ -453,7 +453,7 @@ Status Ksck::ChecksumData(const ChecksumOptions& opts) {
       return Status::ServiceUnavailable(
           "No tablet servers were available to fetch the current timestamp");
     }
-    Info() << "Using snapshot timestamp: " << options.snapshot_timestamp << endl;
+    Out() << "Using snapshot timestamp: " << options.snapshot_timestamp << endl;
   }
 
   // Kick off checksum scans in parallel. For each tablet server, we start
@@ -546,7 +546,6 @@ Status Ksck::ChecksumData(const ChecksumOptions& opts) {
 }
 
 bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
-  bool good_table = true;
   const auto all_tablets = table->tablets();
   vector<shared_ptr<KsckTablet>> tablets;
   std::copy_if(all_tablets.begin(), all_tablets.end(), std::back_inserter(tablets),
@@ -555,124 +554,146 @@ bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
                  });
 
   int table_num_replicas = table->num_replicas();
-  VLOG(1) << Substitute("Verifying $0 tablets for table $1 configured with num_replicas = $2",
+  VLOG(1) << Substitute("Verifying $0 tablet(s) for table $1 configured with num_replicas = $2",
                         tablets.size(), table->name(), table_num_replicas);
 
-  int bad_tablets_count = 0;
-  for (const shared_ptr<KsckTablet> &tablet : tablets) {
-    if (!VerifyTablet(tablet, table_num_replicas)) {
-      bad_tablets_count++;
-    }
+  map<CheckResult, int> result_counts;
+  for (const auto& tablet : tablets) {
+    auto tablet_result = VerifyTablet(tablet, table_num_replicas);
+    result_counts[tablet_result]++;
   }
-  if (bad_tablets_count == 0) {
-    Info() << Substitute("Table $0 is HEALTHY ($1 tablets checked)",
-                         table->name(), tablets.size()) << endl;
+  if (result_counts[CheckResult::OK] == tablets.size()) {
+    Out() << Substitute("Table $0 is $1 ($2 tablet(s) checked)",
+                        table->name(),
+                        Color(AnsiCode::GREEN, "HEALTHY"),
+                        tablets.size()) << endl;
+    return true;
   } else {
-    Warn() << Substitute("Table $0 has $1 bad tablets", table->name(), bad_tablets_count) << endl;
-    good_table = false;
+    if (result_counts[CheckResult::UNAVAILABLE] > 0) {
+      Out() << Substitute("Table $0 has $1 $2 tablet(s)",
+                          table->name(),
+                          result_counts[CheckResult::UNAVAILABLE],
+                          Color(AnsiCode::RED, "unavailable")) << endl;
+    }
+    if (result_counts[CheckResult::UNDER_REPLICATED] > 0) {
+      Out() << Substitute("Table $0 has $1 $2 tablet(s)",
+                          table->name(),
+                          result_counts[CheckResult::UNDER_REPLICATED],
+                          Color(AnsiCode::YELLOW, "under-replicated")) << endl;
+    }
+    return false;
   }
-  return good_table;
 }
 
-bool Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int table_num_replicas) {
-  string tablet_str = Substitute("Tablet $0 of table '$1'",
+Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int table_num_replicas) {
+  const string tablet_str = Substitute("Tablet $0 of table '$1'",
                                  tablet->id(), tablet->table()->name());
-  vector<shared_ptr<KsckTabletReplica> > replicas = tablet->replicas();
-  vector<string> warnings, errors, infos;
-  if (check_replica_count_ && replicas.size() != table_num_replicas) {
-    warnings.push_back(Substitute("$0 has $1 instead of $2 replicas",
-                                  tablet_str, replicas.size(), table_num_replicas));
-  }
-  int leaders_count = 0;
-  int followers_count = 0;
-  int alive_count = 0;
-  int running_count = 0;
-  for (const shared_ptr<KsckTabletReplica> replica : replicas) {
+
+  // Consolidate the state of each replica into a simple struct for easier analysis.
+  struct ReplicaState {
+    KsckTabletReplica* replica;
+    KsckTabletServer* ts = nullptr;
+    tablet::TabletStatePB state = tablet::UNKNOWN;
+    boost::optional<tablet::TabletStatusPB> status_pb;
+  };
+
+  vector<ReplicaState> replica_states;
+  for (const shared_ptr<KsckTabletReplica> replica : tablet->replicas()) {
+    replica_states.emplace_back();
+    auto* repl_state = &replica_states.back();
+    repl_state->replica = replica.get();
+
     VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1",
                           tablet->id(), replica->ts_uuid());
     // Check for agreement on tablet assignment and state between the master
     // and the tablet server.
     auto ts = FindPtrOrNull(cluster_->tablet_servers(), replica->ts_uuid());
-    if (ts && ts->is_healthy()) {
-      alive_count++;
-      auto state = ts->ReplicaState(tablet->id());
-      if (state != tablet::UNKNOWN) {
-        VLOG(1) << Substitute("Tablet server $0 agrees that it hosts a replica of $1",
-                              ts->ToString(), tablet_str);
-      }
+    repl_state->ts = ts.get();
 
-      switch (state) {
-        case tablet::RUNNING:
-          VLOG(1) << Substitute("Tablet replica for $0 on TS $1 is RUNNING",
-                                tablet_str, ts->ToString());
-          running_count++;
-          infos.push_back(Substitute("OK state on TS $0: $1",
-                                     ts->ToString(), tablet::TabletStatePB_Name(state)));
-          break;
-
-        case tablet::UNKNOWN:
-          warnings.push_back(Substitute("Missing a tablet replica on tablet server $0",
-                                        ts->ToString()));
-          break;
-
-        default: {
-          const auto& status_pb = ts->tablet_status_map().at(tablet->id());
-          warnings.push_back(
-              Substitute("Bad state on TS $0: $1\n"
-                         "  Last status: $2\n"
-                         "  Data state:  $3",
-                         ts->ToString(), tablet::TabletStatePB_Name(state),
-                         status_pb.last_status(),
-                         tablet::TabletDataState_Name(status_pb.tablet_data_state())));
-          break;
-        }
+    if (ts && ts->is_healthy()) {
+      repl_state->state = ts->ReplicaState(tablet->id());
+      if (ContainsKey(ts->tablet_status_map(), tablet->id())) {
+        repl_state->status_pb = ts->tablet_status_map().at(tablet->id());
       }
-    } else {
-      // no TS or unhealthy TS
-      warnings.push_back(Substitute("Should have a replica on TS $0, but TS is unavailable",
-                                    ts ? ts->ToString() : replica->ts_uuid()));
-    }
-    if (replica->is_leader()) {
-      VLOG(1) << Substitute("Replica at $0 is a LEADER", replica->ts_uuid());
-      leaders_count++;
-    } else if (replica->is_follower()) {
-      VLOG(1) << Substitute("Replica at $0 is a FOLLOWER", replica->ts_uuid());
-      followers_count++;
     }
   }
-  if (leaders_count == 0) {
-    errors.push_back("No leader detected");
-  }
-  VLOG(1) << Substitute("$0 has $1 leader and $2 followers",
-                        tablet_str, leaders_count, followers_count);
-  int majority_size = consensus::MajoritySize(table_num_replicas);
-  if (alive_count < majority_size) {
-    errors.push_back(Substitute("$0 does not have a majority of replicas on live tablet servers",
-                                tablet_str));
-  } else if (running_count < majority_size) {
-    errors.push_back(Substitute("$0 does not have a majority of replicas in RUNNING state",
-                                tablet_str));
-  }
-
-  bool has_issues = !warnings.empty() || !errors.empty();
-  if (has_issues) {
-    Warn() << "Detected problems with " << tablet_str << endl
-           << "------------------------------------------------------------" << endl;
-    for (const auto& s : warnings) {
-      Warn() << s << endl;
+
+  // Summarize the states.
+  int leaders_count = 0;
+  int running_count = 0;
+  for (const auto& r : replica_states) {
+    if (r.replica->is_leader()) {
+      leaders_count++;
     }
-    for (const auto& s : errors) {
-      Error() << s << endl;
+    if (r.state == tablet::RUNNING) {
+      running_count++;
     }
-    // We only print the 'INFO' messages on tablets that have some issues.
-    // Otherwise, it's a bit verbose.
-    for (const auto& s : infos) {
-      Info() << s << endl;
+  }
+
+  // Determine the overall health state of the tablet.
+  CheckResult result = CheckResult::OK;
+  int num_voters = replica_states.size();
+  int majority_size = consensus::MajoritySize(num_voters);
+  if (running_count < majority_size) {
+    Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
+                        tablet_str,
+                        Color(AnsiCode::RED, "unavailable"),
+                        num_voters - running_count) << endl;
+    result = CheckResult::UNAVAILABLE;
+  } else if (running_count < num_voters) {
+    Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
+                        tablet_str,
+                        Color(AnsiCode::YELLOW, "under-replicated"),
+                        num_voters - running_count) << endl;
+    result = CheckResult::UNDER_REPLICATED;
+  } else if (check_replica_count_ && num_voters < table_num_replicas) {
+    Out() << Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
+                        tablet_str,
+                        Color(AnsiCode::YELLOW, "under-replicated"),
+                        num_voters,
+                        table_num_replicas) << endl;
+    result = CheckResult::UNDER_REPLICATED;
+  } else if (leaders_count != 1) {
+    Out() << Substitute("$0 is $1: expected one LEADER replica",
+                        tablet_str, Color(AnsiCode::RED, "unavailable")) << endl;
+    result = CheckResult::UNAVAILABLE;
+  }
+
+  // In the case that we found something wrong, dump info on all the replicas
+  // to make it easy to debug.
+  if (result != CheckResult::OK) {
+    for (const ReplicaState& r : replica_states) {
+      string ts_str = r.ts ? r.ts->ToString() : r.replica->ts_uuid();
+      const char* leader_str = r.replica->is_leader() ? " [LEADER]" : "";
+
+      Out() << "  " << ts_str << ": ";
+      if (!r.ts || !r.ts->is_healthy()) {
+        Out() << Color(AnsiCode::YELLOW, "TS unavailable") << leader_str << endl;
+        continue;
+      }
+      if (r.state == tablet::RUNNING) {
+        Out() << Color(AnsiCode::GREEN, "RUNNING") << leader_str << endl;
+        continue;
+      }
+      if (r.status_pb == boost::none) {
+        Out() << Color(AnsiCode::YELLOW, "missing") << leader_str << endl;
+        continue;
+      }
+
+      Out() << Color(AnsiCode::YELLOW, "bad state") << leader_str << endl;
+      Out() << Substitute(
+          "    State:       $0\n"
+          "    Data state:  $1\n"
+          "    Last status: $2\n",
+          Color(AnsiCode::BLUE, tablet::TabletStatePB_Name(r.state)),
+          Color(AnsiCode::BLUE, tablet::TabletDataState_Name(r.status_pb->tablet_data_state())),
+          Color(AnsiCode::BLUE, r.status_pb->last_status()));
     }
+
     Out() << endl;
   }
 
-  return !has_issues;
+  return result;
 }
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index cb78686..c512831 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -375,11 +375,18 @@ class Ksck {
   Status ChecksumData(const ChecksumOptions& options);
 
  private:
+  enum class CheckResult {
+    OK,
+    UNDER_REPLICATED,
+    UNAVAILABLE
+  };
+
   bool VerifyTable(const std::shared_ptr<KsckTable>& table);
   bool VerifyTableWithTimeout(const std::shared_ptr<KsckTable>& table,
                               const MonoDelta& timeout,
                               const MonoDelta& retry_interval);
-  bool VerifyTablet(const std::shared_ptr<KsckTablet>& tablet, int table_num_replicas);
+  CheckResult VerifyTablet(const std::shared_ptr<KsckTablet>& tablet,
+                           int table_num_replicas);
 
   const std::shared_ptr<KsckCluster> cluster_;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/tool_action_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc
index d198a9b..93ed5db 100644
--- a/src/kudu/tools/tool_action_cluster.cc
+++ b/src/kudu/tools/tool_action_cluster.cc
@@ -135,6 +135,7 @@ unique_ptr<Mode> BuildClusterMode() {
     .AddRequiredParameter({ "master_address", "Kudu Master RPC address of form hostname:port" })
     .AddOptionalParameter("checksum_scan")
     .AddOptionalParameter("checksum_snapshot")
+    .AddOptionalParameter("color")
     .AddOptionalParameter("tables")
     .AddOptionalParameter("tablets")
     .Build();


[2/2] kudu git commit: master: include TS address in log messages

Posted by ad...@apache.org.
master: include TS address in log messages

When looking at master logs, it's quite annoying to have to translate
back from UUIDs to actual hostnames, since the operator typically wants
to ssh into that node to look at logs, etc.

This patch adds TSDescriptor::ToString() and calls it from all the
points in CatalogManager where log messages refer to an individual
server.

This also adds validation that TS registrations must include at least
one HTTP and one RPC address. This has always been the case but wasn't
verified.

Change-Id: Ic55fa7e818a115de70f9fc6aca12581c3b4779c7
Reviewed-on: http://gerrit.cloudera.org:8080/4131
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/db6d22da
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/db6d22da
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/db6d22da

Branch: refs/heads/master
Commit: db6d22da4d75bfb723dabab25a9ef8603f3a9a19
Parents: cf0eb4d
Author: Todd Lipcon <to...@apache.org>
Authored: Thu Aug 25 22:20:37 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Fri Aug 26 22:25:42 2016 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 91 ++++++++++++++++-----------------
 src/kudu/master/ts_descriptor.cc   | 13 +++++
 src/kudu/master/ts_descriptor.h    |  4 ++
 src/kudu/master/ts_manager.cc      |  8 +--
 4 files changed, 66 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/db6d22da/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index d499296..2bc710e 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1907,14 +1907,14 @@ Status CatalogManager::HandleReportedTablet(TSDescriptor* ts_desc,
   if (report.has_schema_version() &&
       table_lock.data().pb.version() != report.schema_version()) {
     if (report.schema_version() > table_lock.data().pb.version()) {
-      LOG(ERROR) << "TS " << ts_desc->permanent_uuid()
+      LOG(ERROR) << "TS " << ts_desc->ToString()
                  << " has reported a schema version greater than the current one "
                  << " for tablet " << tablet->ToString()
                  << ". Expected version " << table_lock.data().pb.version()
                  << " got " << report.schema_version()
                  << " (corruption)";
     } else {
-      LOG(INFO) << "TS " << ts_desc->permanent_uuid()
+      LOG(INFO) << "TS " << ts_desc->ToString()
             << " does not have the latest schema for tablet " << tablet->ToString()
             << ". Expected version " << table_lock.data().pb.version()
             << " got " << report.schema_version();
@@ -1931,7 +1931,7 @@ Status CatalogManager::HandleReportedTablet(TSDescriptor* ts_desc,
     DCHECK(!s.ok());
     DCHECK_EQ(report.state(), tablet::FAILED);
     LOG(WARNING) << "Tablet " << tablet->ToString() << " has failed on TS "
-                 << ts_desc->permanent_uuid() << ": " << s.ToString();
+                 << ts_desc->ToString() << ": " << s.ToString();
     return Status::OK();
   }
 
@@ -2031,7 +2031,7 @@ Status CatalogManager::HandleReportedTablet(TSDescriptor* ts_desc,
 
       VLOG(2) << "Updating consensus configuration for tablet "
               << final_report->tablet_id()
-              << " from config reported by " << ts_desc->permanent_uuid()
+              << " from config reported by " << ts_desc->ToString()
               << " to that committed in log index "
               << final_report->committed_consensus_state().config().opid_index()
               << " with leader state from term "
@@ -2298,7 +2298,7 @@ class RetryingTSRpcTask : public MonitoredTask {
   // Runs on a reactor thread, so should not block or do any IO.
   void RpcCallback() {
     if (!rpc_.status().ok()) {
-      LOG(WARNING) << "TS " << target_ts_desc_->permanent_uuid() << ": "
+      LOG(WARNING) << "TS " << target_ts_desc_->ToString() << ": "
                    << type_name() << " RPC failed for tablet "
                    << tablet_id() << ": " << rpc_.status().ToString();
     } else if (state() != kStateAborted) {
@@ -2483,18 +2483,19 @@ class AsyncCreateReplica : public RetrySpecificTSRpcTask {
       Status s = StatusFromPB(resp_.error().status());
       if (s.IsAlreadyPresent()) {
         LOG(INFO) << "CreateTablet RPC for tablet " << tablet_id_
-                  << " on TS " << permanent_uuid_ << " returned already present: "
+                  << " on TS " << target_ts_desc_->ToString() << " returned already present: "
                   << s.ToString();
         MarkComplete();
       } else {
         LOG(WARNING) << "CreateTablet RPC for tablet " << tablet_id_
-                     << " on TS " << permanent_uuid_ << " failed: " << s.ToString();
+                     << " on TS " << target_ts_desc_->ToString() << " failed: " << s.ToString();
       }
     }
   }
 
   virtual bool SendRequest(int attempt) OVERRIDE {
-    VLOG(1) << "Send create tablet request to " << permanent_uuid_ << ":\n"
+    VLOG(1) << "Send create tablet request to "
+            << target_ts_desc_->ToString() << ":\n"
             << " (attempt " << attempt << "):\n"
             << req_.DebugString();
     ts_proxy_->CreateTabletAsync(req_, &resp_, &rpc_,
@@ -2541,18 +2542,21 @@ class AsyncDeleteReplica : public RetrySpecificTSRpcTask {
       TabletServerErrorPB::Code code = resp_.error().code();
       switch (code) {
         case TabletServerErrorPB::TABLET_NOT_FOUND:
-          LOG(WARNING) << "TS " << permanent_uuid_ << ": delete failed for tablet " << tablet_id_
+          LOG(WARNING) << "TS " << target_ts_desc_->ToString()
+                       << ": delete failed for tablet " << tablet_id_
                        << " because the tablet was not found. No further retry: "
                        << status.ToString();
           MarkComplete();
           break;
         case TabletServerErrorPB::CAS_FAILED:
-          LOG(WARNING) << "TS " << permanent_uuid_ << ": delete failed for tablet " << tablet_id_
+          LOG(WARNING) << "TS " << target_ts_desc_->ToString()
+                       << ": delete failed for tablet " << tablet_id_
                        << " due to a CAS failure. No further retry: " << status.ToString();
           MarkComplete();
           break;
         default:
-          LOG(WARNING) << "TS " << permanent_uuid_ << ": delete failed for tablet " << tablet_id_
+          LOG(WARNING) << "TS " << target_ts_desc_->ToString()
+                       << ": delete failed for tablet " << tablet_id_
                        << " with error code " << TabletServerErrorPB::Code_Name(code)
                        << ": " << status.ToString();
           break;
@@ -2560,14 +2564,17 @@ class AsyncDeleteReplica : public RetrySpecificTSRpcTask {
     } else {
       master_->catalog_manager()->NotifyTabletDeleteSuccess(permanent_uuid_, tablet_id_);
       if (table_) {
-        LOG(INFO) << "TS " << permanent_uuid_ << ": tablet " << tablet_id_
+        LOG(INFO) << "TS " << target_ts_desc_->ToString()
+                  << ": tablet " << tablet_id_
                   << " (table " << table_->ToString() << ") successfully deleted";
       } else {
-        LOG(WARNING) << "TS " << permanent_uuid_ << ": tablet " << tablet_id_
+        LOG(WARNING) << "TS " << target_ts_desc_->ToString()
+                     << ": tablet " << tablet_id_
                      << " did not belong to a known table, but was successfully deleted";
       }
       MarkComplete();
-      VLOG(1) << "TS " << permanent_uuid_ << ": delete complete on tablet " << tablet_id_;
+      VLOG(1) << "TS " << target_ts_desc_->ToString()
+              << ": delete complete on tablet " << tablet_id_;
     }
   }
 
@@ -2581,9 +2588,12 @@ class AsyncDeleteReplica : public RetrySpecificTSRpcTask {
       req.set_cas_config_opid_index_less_or_equal(*cas_config_opid_index_less_or_equal_);
     }
 
-    VLOG(1) << "Send delete tablet request to " << permanent_uuid_
-            << " (attempt " << attempt << "):\n"
-            << req.DebugString();
+    LOG(INFO) << Substitute("Sending DeleteTablet($0) for tablet $1 on $2 "
+                            "($3)",
+                            TabletDataState_Name(delete_type_),
+                            tablet_id_,
+                            target_ts_desc_->ToString(),
+                            reason_);
     ts_proxy_->DeleteTabletAsync(req, &resp_, &rpc_,
                                  boost::bind(&AsyncDeleteReplica::RpcCallback, this));
     return true;
@@ -2621,9 +2631,6 @@ class AsyncAlterTable : public RetryingTSRpcTask {
 
  private:
   virtual string tablet_id() const OVERRIDE { return tablet_->tablet_id(); }
-  string permanent_uuid() const {
-    return target_ts_desc_->permanent_uuid();
-  }
 
   virtual void HandleResponse(int attempt) OVERRIDE {
     if (resp_.has_error()) {
@@ -2634,18 +2641,19 @@ class AsyncAlterTable : public RetryingTSRpcTask {
         case TabletServerErrorPB::TABLET_NOT_FOUND:
         case TabletServerErrorPB::MISMATCHED_SCHEMA:
         case TabletServerErrorPB::TABLET_HAS_A_NEWER_SCHEMA:
-          LOG(WARNING) << "TS " << permanent_uuid() << ": alter failed for tablet "
+          LOG(WARNING) << "TS " << target_ts_desc_->ToString() << ": alter failed for tablet "
                        << tablet_->ToString() << " no further retry: " << status.ToString();
           MarkComplete();
           break;
         default:
-          LOG(WARNING) << "TS " << permanent_uuid() << ": alter failed for tablet "
+          LOG(WARNING) << "TS " << target_ts_desc_->ToString() << ": alter failed for tablet "
                        << tablet_->ToString() << ": " << status.ToString();
           break;
       }
     } else {
       MarkComplete();
-      VLOG(1) << "TS " << permanent_uuid() << ": alter complete on tablet " << tablet_->ToString();
+      VLOG(1) << "TS " << target_ts_desc_->ToString()
+              << ": alter complete on tablet " << tablet_->ToString();
     }
 
     if (state() != kStateComplete) {
@@ -2657,7 +2665,7 @@ class AsyncAlterTable : public RetryingTSRpcTask {
     TableMetadataLock l(tablet_->table().get(), TableMetadataLock::READ);
 
     tserver::AlterSchemaRequestPB req;
-    req.set_dest_uuid(permanent_uuid());
+    req.set_dest_uuid(target_ts_desc_->permanent_uuid());
     req.set_tablet_id(tablet_->tablet_id());
     req.set_new_table_name(l.data().pb.name());
     req.set_schema_version(l.data().pb.version());
@@ -2665,7 +2673,7 @@ class AsyncAlterTable : public RetryingTSRpcTask {
 
     l.Unlock();
 
-    VLOG(1) << "Send alter table request to " << permanent_uuid()
+    VLOG(1) << "Send alter table request to " << target_ts_desc_->ToString()
             << " (attempt " << attempt << "):\n"
             << req.DebugString();
     ts_proxy_->AlterSchemaAsync(req, &resp_, &rpc_,
@@ -2716,9 +2724,11 @@ class AsyncAddServerTask : public RetryingTSRpcTask {
   virtual string type_name() const OVERRIDE { return "AddServer ChangeConfig"; }
 
   virtual string description() const OVERRIDE {
-    return Substitute("AddServer ChangeConfig RPC for tablet $0 on peer $1 "
+    return Substitute("AddServer ChangeConfig RPC for tablet $0 on TS $1 "
                       "with cas_config_opid_index $2",
-                      tablet_->tablet_id(), permanent_uuid(), cstate_.config().opid_index());
+                      tablet_->tablet_id(),
+                      target_ts_desc_->ToString(),
+                      cstate_.config().opid_index());
   }
 
  protected:
@@ -2727,9 +2737,6 @@ class AsyncAddServerTask : public RetryingTSRpcTask {
 
  private:
   virtual string tablet_id() const OVERRIDE { return tablet_->tablet_id(); }
-  string permanent_uuid() const {
-    return target_ts_desc_->permanent_uuid();
-  }
 
   const scoped_refptr<TabletInfo> tablet_;
   const ConsensusStatePB cstate_;
@@ -2768,7 +2775,7 @@ bool AsyncAddServerTask::SendRequest(int attempt) {
     return false;
   }
 
-  req_.set_dest_uuid(permanent_uuid());
+  req_.set_dest_uuid(target_ts_desc_->permanent_uuid());
   req_.set_tablet_id(tablet_->tablet_id());
   req_.set_type(consensus::ADD_SERVER);
   req_.set_cas_config_opid_index(cstate_.config().opid_index());
@@ -2776,16 +2783,11 @@ bool AsyncAddServerTask::SendRequest(int attempt) {
   peer->set_permanent_uuid(replacement_replica->permanent_uuid());
   ServerRegistrationPB peer_reg;
   replacement_replica->GetRegistration(&peer_reg);
-  if (peer_reg.rpc_addresses_size() == 0) {
-    KLOG_EVERY_N(WARNING, 100) << LogPrefix() << "Candidate replacement "
-                               << replacement_replica->permanent_uuid()
-                               << " has no registered rpc address: "
-                               << peer_reg.ShortDebugString();
-    return false;
-  }
+  CHECK_GT(peer_reg.rpc_addresses_size(), 0);
   *peer->mutable_last_known_addr() = peer_reg.rpc_addresses(0);
   peer->set_member_type(RaftPeerPB::VOTER);
-  VLOG(1) << "Sending AddServer ChangeConfig request to " << permanent_uuid() << ":\n"
+  VLOG(1) << "Sending AddServer ChangeConfig request to "
+          << target_ts_desc_->ToString() << ":\n"
           << req_.DebugString();
   consensus_proxy_->ChangeConfigAsync(req_, &resp_, &rpc_,
                                       boost::bind(&AsyncAddServerTask::RpcCallback, this));
@@ -2804,13 +2806,15 @@ void AsyncAddServerTask::HandleResponse(int attempt) {
   // Do not retry on a CAS error, otherwise retry forever or until cancelled.
   switch (resp_.error().code()) {
     case TabletServerErrorPB::CAS_FAILED:
-      LOG_WITH_PREFIX(WARNING) << "ChangeConfig() failed with leader " << permanent_uuid()
+      LOG_WITH_PREFIX(WARNING) << "ChangeConfig() failed with leader "\
+                               << target_ts_desc_->ToString()
                                << " due to CAS failure. No further retry: "
                                << status.ToString();
       MarkFailed();
       break;
     default:
-      LOG_WITH_PREFIX(INFO) << "ChangeConfig() failed with leader " << permanent_uuid()
+      LOG_WITH_PREFIX(INFO) << "ChangeConfig() failed with leader "
+                            << target_ts_desc_->ToString()
                             << " due to error "
                             << TabletServerErrorPB::Code_Name(resp_.error().code())
                             << ". This operation will be retried. Error detail: "
@@ -2873,11 +2877,6 @@ void CatalogManager::SendDeleteReplicaRequest(
     const scoped_refptr<TableInfo>& table,
     const string& ts_uuid,
     const string& reason) {
-  LOG_WITH_PREFIX(INFO) << Substitute("Deleting tablet $0 on peer $1 "
-                                      "with delete type $2 ($3)",
-                                      tablet_id, ts_uuid,
-                                      TabletDataState_Name(delete_type),
-                                      reason);
   AsyncDeleteReplica* call =
       new AsyncDeleteReplica(master_, ts_uuid, table,
                              tablet_id, delete_type, cas_config_opid_index_less_or_equal,

http://git-wip-us.apache.org/repos/asf/kudu/blob/db6d22da/src/kudu/master/ts_descriptor.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc
index 1651c0b..dd28cb2 100644
--- a/src/kudu/master/ts_descriptor.cc
+++ b/src/kudu/master/ts_descriptor.cc
@@ -97,6 +97,13 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
     return Status::InvalidArgument(msg);
   }
 
+  if (registration.rpc_addresses().empty() ||
+      registration.http_addresses().empty()) {
+    return Status::InvalidArgument(
+        "invalid registration: must have at least one RPC and one HTTP address",
+        registration.ShortDebugString());
+  }
+
   if (instance.instance_seqno() < latest_seqno_) {
     return Status::AlreadyPresent(
       strings::Substitute("Cannot register with sequence number $0:"
@@ -250,5 +257,11 @@ Status TSDescriptor::GetConsensusProxy(const shared_ptr<rpc::Messenger>& messeng
   return Status::OK();
 }
 
+string TSDescriptor::ToString() const {
+  std::lock_guard<simple_spinlock> l(lock_);
+  const auto& addr = registration_->rpc_addresses(0);
+  return strings::Substitute("$0 ($1:$2)", permanent_uuid_, addr.host(), addr.port());
+}
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/db6d22da/src/kudu/master/ts_descriptor.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h
index 60c30bc..86a953e 100644
--- a/src/kudu/master/ts_descriptor.h
+++ b/src/kudu/master/ts_descriptor.h
@@ -111,6 +111,10 @@ class TSDescriptor {
     return num_live_replicas_;
   }
 
+  // Return a string form of this TS, suitable for printing.
+  // Includes the UUID as well as last known host/port.
+  std::string ToString() const;
+
  private:
   FRIEND_TEST(TestTSDescriptor, TestReplicaCreationsDecay);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/db6d22da/src/kudu/master/ts_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc
index 052b503..dd95cc5 100644
--- a/src/kudu/master/ts_manager.cc
+++ b/src/kudu/master/ts_manager.cc
@@ -80,14 +80,14 @@ Status TSManager::RegisterTS(const NodeInstancePB& instance,
     shared_ptr<TSDescriptor> new_desc;
     RETURN_NOT_OK(TSDescriptor::RegisterNew(instance, registration, &new_desc));
     InsertOrDie(&servers_by_id_, uuid, new_desc);
-    LOG(INFO) << Substitute("Registered new tserver $0 with Master",
-                            instance.ShortDebugString());
+    LOG(INFO) << Substitute("Registered new tserver with Master: $0",
+                            new_desc->ToString());
     desc->swap(new_desc);
   } else {
     shared_ptr<TSDescriptor> found(FindOrDie(servers_by_id_, uuid));
     RETURN_NOT_OK(found->Register(instance, registration));
-    LOG(INFO) << Substitute("Re-registered known tserver $0 with Master",
-                            instance.ShortDebugString());
+    LOG(INFO) << Substitute("Re-registered known tserver with Master: $0",
+                            found->ToString());
     desc->swap(found);
   }