You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2022/10/12 04:31:08 UTC

[kudu] branch master updated (3eb4607e4 -> e5ace5fa2)

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


    from 3eb4607e4 KUDU-3401 Fix table creation with HMS Integration
     new 54c58fb8c [test] make master_authz-itest more robust
     new e5ace5fa2 KUDU-2353: tool to parse metrics out of diagnostic logs

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/gutil/map-util.h                          |  20 +
 src/kudu/integration-tests/master_authz-itest.cc   | 214 ++++----
 src/kudu/tools/diagnostics_log_parser-test.cc      | 207 ++++----
 src/kudu/tools/diagnostics_log_parser.cc           | 571 +++++++++++++++++----
 src/kudu/tools/diagnostics_log_parser.h            | 235 +++++++--
 src/kudu/tools/kudu-tool-test.cc                   |  66 ++-
 src/kudu/tools/testdata/sample-diagnostics-log.txt |   1 +
 src/kudu/tools/tool_action_diagnose.cc             | 158 +++++-
 src/kudu/util/regex.h                              |  56 ++
 9 files changed, 1135 insertions(+), 393 deletions(-)
 create mode 100644 src/kudu/util/regex.h


[kudu] 02/02: KUDU-2353: tool to parse metrics out of diagnostic logs

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit e5ace5fa28154fa906c1b087e3b80461b6d85337
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Sat Sep 15 21:26:27 2018 -0700

    KUDU-2353: tool to parse metrics out of diagnostic logs
    
    This patch contains C++ implementation of the metrics log parser script.
    There are a couple functional differences between this tool and the
    existing script:
      - This tool recognizes table metrics
      - This tool allows for filtering metrics by table or tablet
        identifier
      - Histogram metrics for this tool also spit out the rough count of the
        measurements
    
    This patch also addresses KUDU-2597 as a subtask of the JIRA item
    mentioned in the summary.
    
    NOTES:
      - Kudu metrics are only output into the diagnostic log when a metric
        has changed, so this patch tracks the metric values per entity
        (tablet ID, table ID, server) at each point in time in order
        to output the correct values. This means that if within a given set
        of files, a tablet's metric has not changed and no corresponding
        records are in the files, this tool is not printing any information
        on the tablet's metrics.
    
      - Kudu histogram metrics do spit out a summary for percentiles. The
        tool explicitly does not use that and instead generates these
        metrics from the histogram counts. While less accurate (IIUC, the
        counts can be lossy), this allows us to generate aggregated
        summaries from multiple entities.
    
    Here's an example:
    
    [awong@va1022 release]$ ./bin/kudu diagnose parse_metrics kudu-tserver.worker12.foobar.com.kudu.diagnostics.20210123-201217.0.74565 --simple_metrics=tablet.scans_started:num_scans_started --rate_metrics=tablet.scans_started:scans_started_per_sec --histogram_metrics=server.scanner_duration:scanner_duration_
    us,server.handler_latency_kudu_tserver_TabletServerService_Scan:scan_rpc_us
    I0131 11:53:27.010298 151768 diagnostics_log_parser.cc:272] collecting simple metric tablet.scans_started as num_scanners_started
    I0131 11:53:27.010438 151768 diagnostics_log_parser.cc:279] collecting rate metric tablet.scans_started as scanners_started_per_sec
    I0131 11:53:27.010455 151768 diagnostics_log_parser.cc:286] collecting histogram metric server.handler_latency_kudu_tserver_TabletServerService_Scan as scan_rpc_us
    I0131 11:53:27.010524 151768 diagnostics_log_parser.cc:286] collecting histogram metric server.scanner_duration as scanner_duration_us
    timestamp       num_scanners_started       scanners_started_per_sec   scan_rpc_us_count       scan_rpc_us_min scan_rpc_us_p50 scan_rpc_us_p75 scan_rpc_us_p95 scan_rpc_us_p99 scan_rpc_us_p99_99      scan_rpc_us_max scanner_duration_us_count       scanner_duration_us_min scanner_duration_us_p50 scanner_duration_us_p75 scanner_duration_us_p95 scanner_duration_us_p99 scanner_duration_us_p99_99  scanner_duration_us_max
    1611432793767488        68492   0       434170147       2       1215    1639    3711    12927   501759  8650751 1854125 12      23295   1302527 54788095        60030975        60030975        60030975
    1611432853767552        231516  2717.0637684653134      434198546       2       1215    1639    3711    12927   501759  8650751 1854200 12      23295   1302527 54788095        60030975        60030975        60030975
    1611432913767616        349073  1959.2812434333403      434227285       2       1215    1639    3711    12927   501759  8650751 1854306 12      23295   1302527 54788095        60030975        60030975        60030975
    1611432973767689        829597  8008.7235893863 434255021       2       1215    1639    3711    12927   501759  8650751 1854517 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433033767772        926516  1615.314432148369       434283184       2       1215    1639    3711    12927   501759  8650751 1854605 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433093767841        926626  1.8333312250024245      434309627       2       1215    1639    3711    12927   501759  8650751 1854719 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433153767902        960053  557.11610026529809      434339928       2       1215    1639    3711    12927   501759  8650751 1854788 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433213767967        1009625 826.19910495096963      434366776       2       1215    1639    3711    12927   501759  8650751 1854831 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433273768032        1059960 838.91575784126235      434394555       2       1215    1639    3711    12927   501759  8650751 1854966 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433333768067        1061577 26.949984279175837      434420683       2       1215    1639    3711    12927   501759  8650751 1855023 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433393768130        1082096 341.98297425121041      434447991       2       1215    1639    3711    12927   501759  8650751 1855185 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433453768205        1083102 16.76664570835953       434476348       2       1215    1639    3711    12927   501759  8650751 1855285 12      23295   1302527 54788095        60030975        60030975        60030975
    1611433513768270        1088338 87.2665721278802        434498551       2       1215    1639    3711    12927   501759  8650751 1855388 12      23295   1302527 54788095        60030975        60030975        60030975
    
    Change-Id: I8077fb4f6b41fe4b2bd6c877af379ea7a9f415b1
    Reviewed-on: http://gerrit.cloudera.org:8080/12570
    Tested-by: Kudu Jenkins
    Reviewed-by: Abhishek Chennaka <ac...@cloudera.com>
    Reviewed-by: Alexey Serbin <al...@apache.org>
---
 src/kudu/gutil/map-util.h                          |  20 +
 src/kudu/tools/diagnostics_log_parser-test.cc      | 207 ++++----
 src/kudu/tools/diagnostics_log_parser.cc           | 571 +++++++++++++++++----
 src/kudu/tools/diagnostics_log_parser.h            | 235 +++++++--
 src/kudu/tools/kudu-tool-test.cc                   |  66 ++-
 src/kudu/tools/testdata/sample-diagnostics-log.txt |   1 +
 src/kudu/tools/tool_action_diagnose.cc             | 158 +++++-
 src/kudu/util/regex.h                              |  56 ++
 8 files changed, 1028 insertions(+), 286 deletions(-)

diff --git a/src/kudu/gutil/map-util.h b/src/kudu/gutil/map-util.h
index a0a5ea237..73d5a865e 100644
--- a/src/kudu/gutil/map-util.h
+++ b/src/kudu/gutil/map-util.h
@@ -564,6 +564,26 @@ void AddTokenCounts(
   }
 }
 
+// Merges two count maps together.
+//
+// Example:
+//   vector<string> v1 = {"c"};
+//   map<string, int> m1;
+//   MergeTokenCounts(v1, 1, &m1);
+//   vector<string> v2 = {"c", "c"};
+//   map<string, int> m2;
+//   MergeTokenCounts(v2, 1, &m2);
+//   assert(m["c"] == 3);
+template <typename Collection>
+void MergeTokenCounts(Collection* const count_map,
+                      const Collection& counts_to_add) {
+  for (const auto& value_and_count : counts_to_add) {
+    typename Collection::mapped_type& value = LookupOrInsert(
+        count_map, value_and_count.first, typename Collection::mapped_type());
+    value += value_and_count.second;
+  }
+}
+
 // Helpers for LookupOrInsertNew(), needed to create a new value type when the
 // type itself is a pointer, i.e., these extract the actual type from a pointer.
 template <class T>
diff --git a/src/kudu/tools/diagnostics_log_parser-test.cc b/src/kudu/tools/diagnostics_log_parser-test.cc
index 51bcc6cd5..25b251fa7 100644
--- a/src/kudu/tools/diagnostics_log_parser-test.cc
+++ b/src/kudu/tools/diagnostics_log_parser-test.cc
@@ -17,10 +17,10 @@
 
 #include "kudu/tools/diagnostics_log_parser.h"
 
-#include <memory>
 #include <ostream>
 #include <string>
-#include <vector>
+#include <unordered_map>
+#include <utility>
 
 #include <gtest/gtest.h>
 #include <rapidjson/document.h>
@@ -28,208 +28,195 @@
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 
-namespace kudu {
-namespace tools {
-
 using std::string;
 using std::stringstream;
-using std::unique_ptr;
-using std::vector;
+
+namespace kudu {
+namespace tools {
 
 TEST(DiagLogParserTest, TestParseLine) {
+  const auto parse_line = [&] (const string& line_str) {
+    ParsedLine pl(line_str);
+    return pl.Parse();
+  };
   // Lines have the following format:
   // I0220 17:38:09.950546 metrics 1519177089950546 <json blob>...
+  // Lines begin with 'I' and detect missing fields.
+  Status s = parse_line("");
+  ASSERT_TRUE(s.IsInvalidArgument());
+  ASSERT_STR_CONTAINS(s.ToString(), "lines must start with 'I'");
+
+  s = parse_line("X0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}");
+  ASSERT_TRUE(s.IsInvalidArgument());
+  ASSERT_STR_CONTAINS(s.ToString(), "lines must start with 'I'");
+
+  s = parse_line("I0220 17:38:09.950546 metrics 1519177089950546");
+  ASSERT_TRUE(s.IsCorruption());
+  ASSERT_STR_CONTAINS(s.ToString(), "invalid JSON payload");
   {
-    // Lines begin with 'I' and detect missing fields.
-    ParsedLine pl;
-    string line = "";
-    Status s = pl.Parse(line);
-    ASSERT_TRUE(s.IsInvalidArgument());
-    ASSERT_STR_CONTAINS(s.ToString(), "lines must start with 'I'");
-
-    line = "X0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}";
-    s = pl.Parse(line);
-    ASSERT_TRUE(s.IsInvalidArgument());
-    ASSERT_STR_CONTAINS(s.ToString(), "lines must start with 'I'");
-
-    line = "I0220 17:38:09.950546 metrics 1519177089950546";
-    s = pl.Parse(line);
-    ASSERT_TRUE(s.IsCorruption());
-    ASSERT_STR_CONTAINS(s.ToString(), "invalid JSON payload");
-
-    line = "I0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
+    ParsedLine pl("I0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
   }
-
   {
     // The date and time should be parsed successfully.
-    ParsedLine pl;
-    string line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
+    ParsedLine pl("I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
     ASSERT_EQ("0220 17:38:09.950546", pl.date_time());
   }
-
   {
     // The line parser recognizes "stacks" and "symbols" categories.
     // "metrics" isn't recognized yet.
-    ParsedLine pl;
-    string line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
+    ParsedLine pl("I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
     ASSERT_EQ(RecordType::kStacks, pl.type());
-
-    line = "I0220 17:38:09.950546 symbols 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
+  }
+  {
+    ParsedLine pl("I0220 17:38:09.950546 symbols 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
     ASSERT_EQ(RecordType::kSymbols, pl.type());
-
-    line = "I0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
-    ASSERT_EQ(RecordType::kUnknown, pl.type());
-
-    line = "I0220 17:38:09.950546 foo 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
-    ASSERT_EQ(RecordType::kUnknown, pl.type());
   }
-
   {
-    // The timestamp must be a number.
-    ParsedLine pl;
-    string line = "I0220 17:38:09.950546 stacks 1234foo567890000 {\"foo\" : \"bar\"}";
-    Status s = pl.Parse(line);
-    ASSERT_TRUE(s.IsInvalidArgument());
-    ASSERT_STR_CONTAINS(s.ToString(), "invalid timestamp");
+    ParsedLine pl("I0220 17:38:09.950546 metrics 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
+    ASSERT_EQ(RecordType::kMetrics, pl.type());
+  }
+  {
+    ParsedLine pl("I0220 17:38:09.950546 foo 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
+    ASSERT_EQ(RecordType::kUnknown, pl.type());
   }
-
   {
     // The json blob must be valid json.
-    ParsedLine pl;
-    string line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}";
-    ASSERT_OK(pl.Parse(line));
+    ParsedLine pl("I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : \"bar\"}");
+    ASSERT_OK(pl.Parse());
     string val = (*pl.json())["foo"].GetString();
     ASSERT_EQ(val, "bar");
-
-    line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : }";
-    Status s = pl.Parse(line);
-    ASSERT_TRUE(s.IsCorruption()) << s.ToString();
   }
+  // The timestamp must be a number.
+  s = parse_line("I0220 17:38:09.950546 stacks 1234foo567890000 {\"foo\" : \"bar\"}");
+  ASSERT_TRUE(s.IsInvalidArgument());
+  ASSERT_STR_CONTAINS(s.ToString(), "invalid timestamp");
+
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 {\"foo\" : }");
+  ASSERT_TRUE(s.IsCorruption()) << s.ToString();
 }
 
 class TestSymbolsLogVisitor : public LogVisitor {
  public:
-  void VisitSymbol(const string& addr, const string& symbol) override {
-    addr_ = addr;
-    symbol_ = symbol;
+  Status ParseRecord(const ParsedLine& pl) override {
+    if (pl.type() == RecordType::kSymbols) {
+      SymbolsRecord sr;
+      RETURN_NOT_OK(sr.FromParsedLine(pl));
+      addr_ = sr.addr_to_symbol.begin()->first;
+      symbol_ = sr.addr_to_symbol.begin()->second;
+    }
+    return Status::OK();
   }
 
-  void VisitStacksRecord(const StacksRecord& /*sr*/) override {}
-
   string addr_;
   string symbol_;
 };
 
 TEST(DiagLogParserTest, TestParseSymbols) {
   TestSymbolsLogVisitor lv;
-  LogParser lp(&lv);
+  const auto parse_line = [&] (const string& line_str) {
+    ParsedLine pl(line_str);
+    pl.Parse();
+    return lv.ParseRecord(pl);
+  };
 
-  string line = "I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : 99}";
-  Status s = lp.ParseLine(line);
+  Status s = parse_line("I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : 99}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected symbol values to be strings");
 
-  line = "I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : { \"bar\" : \"baaz\" } }";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : "
+                "{ \"bar\" : \"baaz\" } }");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected symbol values to be strings");
 
-  line = "I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : \"symbol\"}";
-  ASSERT_OK(lp.ParseLine(line));
+  ASSERT_OK(parse_line("I0220 17:38:09.950546 symbols 1519177089950546 {\"addr\" : \"symbol\"}"));
   ASSERT_EQ("addr", lv.addr_);
   ASSERT_EQ("symbol", lv.symbol_);
 }
 
-class NoopLogVisitor : public LogVisitor {
+class TestStacksLogVisitor : public LogVisitor {
  public:
-  void VisitSymbol(const string& /*addr*/, const string& /*symbol*/) override {}
-  void VisitStacksRecord(const StacksRecord& /*sr*/) override {}
+  Status ParseRecord(const ParsedLine& pl) override {
+    StacksRecord sr;
+    return sr.FromParsedLine(pl);
+  }
 };
 
 // For parsing stacks, we'll check for success or error only. The parse_stacks
 // tool's tests serve as a check that the right information is parsed.
 TEST(DiagLogParserTest, TestParseStacks) {
-  NoopLogVisitor lv;
-  LogParser lp(&lv);
+  TestStacksLogVisitor lv;
+  const auto parse_line = [&] (const string& line_str) {
+    ParsedLine pl(line_str);
+    pl.Parse();
+    return lv.ParseRecord(pl);
+  };
 
   // The "reason" field must be a string, if present.
-  string line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"reason\" : 1.2, \"groups\" : []}";
-  Status s = lp.ParseLine(line);
+  Status s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+                        "{\"reason\" : 1.2, \"groups\" : []}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected stacks 'reason' to be a string");
 
   // The "groups" field must present
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 {}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 {}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "no 'groups' field in stacks object");
 
   // The "groups" field must an array.
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"groups\" : \"foo\"}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 {\"groups\" : \"foo\"}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "'groups' field should be an array");
 
   // A member of the groups array (a group) must be an object.
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 {\"groups\" : [\"foo\"]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 {\"groups\" : [\"foo\"]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected stacks groups to be JSON objects");
 
   // A group must have "tids" and stack fields.
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"stack\" : []}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"stack\" : []}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected stacks groups to have frames and tids");
 
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : 5}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"tids\" : 5}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected stacks groups to have frames and tids");
 
   // A group must have a "tids" field, with value a numeric array
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : 5, \"stack\" : []}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"tids\" : 5, \"stack\" : []}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected 'tids' to be an array");
 
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : [false], \"stack\" : []}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"tids\" : [false], \"stack\" : []}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected 'tids' elements to be numeric");
 
   // A group must have a "stack" field, with value an array of strings.
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : [], \"stack\" : 5}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"tids\" : [], \"stack\" : 5}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected 'stack' to be an array");
 
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : [], \"stack\" : [5]}]}";
-  s = lp.ParseLine(line);
+  s = parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+         "{\"groups\" : [{\"tids\" : [], \"stack\" : [5]}]}");
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "expected 'stack' elements to be strings");
 
   // Happy cases with and without a "reason" field.
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"reason\" : \"test\", \"groups\" : [{\"tids\" : [0], \"stack\" : [\"stack\"]}]}";
-  ASSERT_OK(lp.ParseLine(line));
+  ASSERT_OK(parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+      "{\"reason\" : \"test\", \"groups\" : [{\"tids\" : [0], \"stack\" : [\"stack\"]}]}"));
 
-  line = "I0220 17:38:09.950546 stacks 1519177089950546 "
-         "{\"groups\" : [{\"tids\" : [0], \"stack\" : [\"stack\"]}]}";
-  ASSERT_OK(lp.ParseLine(line));
+  ASSERT_OK(parse_line("I0220 17:38:09.950546 stacks 1519177089950546 "
+      "{\"groups\" : [{\"tids\" : [0], \"stack\" : [\"stack\"]}]}"));
 }
 
 } // namespace tools
diff --git a/src/kudu/tools/diagnostics_log_parser.cc b/src/kudu/tools/diagnostics_log_parser.cc
index d167250e4..190f19b15 100644
--- a/src/kudu/tools/diagnostics_log_parser.cc
+++ b/src/kudu/tools/diagnostics_log_parser.cc
@@ -18,12 +18,18 @@
 #include "kudu/tools/diagnostics_log_parser.h"
 
 #include <array>
+#include <cerrno>
 #include <cstdint>
+#include <cstdio>
 #include <iomanip>
 #include <iostream>
 #include <optional>
+#include <set>
 #include <string>
+#include <tuple>
+#include <type_traits>
 #include <utility>
+#include <vector>
 
 #include <glog/logging.h>
 #include <rapidjson/document.h>
@@ -35,7 +41,8 @@
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/util/jsonreader.h" // IWYU pragma: keep
+#include "kudu/util/errno.h"
+#include "kudu/util/jsonreader.h"
 #include "kudu/util/status.h"
 
 using std::array;
@@ -43,15 +50,65 @@ using std::cout;
 using std::endl;
 using std::ifstream;
 using std::string;
+using std::vector;
 using strings::Substitute;
 
+
 namespace kudu {
 namespace tools {
 
+namespace {
+
+Status ParseStackGroup(const rapidjson::Value& group_json,
+                       StacksRecord::Group* group) {
+  DCHECK(group);
+  StacksRecord::Group ret;
+  if (PREDICT_FALSE(!group_json.IsObject())) {
+    return Status::InvalidArgument("expected stacks groups to be JSON objects");
+  }
+  if (!group_json.HasMember("tids") || !group_json.HasMember("stack")) {
+    return Status::InvalidArgument("expected stacks groups to have frames and tids");
+  }
+
+  // Parse the tids.
+  const auto& tids = group_json["tids"];
+  if (PREDICT_FALSE(!tids.IsArray())) {
+    return Status::InvalidArgument("expected 'tids' to be an array");
+  }
+  ret.tids.reserve(tids.Size());
+  for (const auto* tid = tids.Begin();
+       tid != tids.End();
+       ++tid) {
+    if (PREDICT_FALSE(!tid->IsNumber())) {
+      return Status::InvalidArgument("expected 'tids' elements to be numeric");
+    }
+    ret.tids.push_back(tid->GetInt64());
+  }
+
+  // Parse and symbolize the stack trace itself.
+  const auto& stack = group_json["stack"];
+  if (PREDICT_FALSE(!stack.IsArray())) {
+    return Status::InvalidArgument("expected 'stack' to be an array");
+  }
+  for (const auto* frame = stack.Begin();
+       frame != stack.End();
+       ++frame) {
+    if (PREDICT_FALSE(!frame->IsString())) {
+      return Status::InvalidArgument("expected 'stack' elements to be strings");
+    }
+    ret.frame_addrs.emplace_back(frame->GetString());
+  }
+  *group = std::move(ret);
+  return Status::OK();
+}
+
+}  // anonymous namespace
+
 const char* RecordTypeToString(RecordType r) {
   switch (r) {
     case RecordType::kStacks: return "stacks"; break;
     case RecordType::kSymbols: return "symbols"; break;
+    case RecordType::kMetrics: return "metrics"; break;
     case RecordType::kUnknown: return "<unknown>"; break;
   }
   return "<unreachable>";
@@ -61,11 +118,350 @@ std::ostream& operator<<(std::ostream& o, RecordType r) {
   return o << RecordTypeToString(r);
 }
 
-void StackDumpingLogVisitor::VisitSymbol(const string& addr, const string& symbol) {
-  InsertIfNotPresent(&symbols_, addr, symbol);
+Status SymbolsRecord::FromParsedLine(const ParsedLine& pl) {
+  DCHECK_EQ(RecordType::kSymbols, pl.type());
+
+  if (!pl.json()->IsObject()) {
+    return Status::InvalidArgument("expected symbols data to be a JSON object");
+  }
+  for (auto it = pl.json()->MemberBegin();
+       it != pl.json()->MemberEnd();
+       ++it) {
+    if (PREDICT_FALSE(!it->value.IsString())) {
+      return Status::InvalidArgument("expected symbol values to be strings");
+    }
+    InsertIfNotPresent(&addr_to_symbol, it->name.GetString(), it->value.GetString());
+  }
+  return Status::OK();
+}
+
+Status MetricsRecord::FromParsedLine(const MetricsCollectingOpts& opts, const ParsedLine& pl) {
+  DCHECK_EQ(RecordType::kMetrics, pl.type());
+  if (!pl.json()->IsArray()) {
+    return Status::InvalidArgument("expected a metric JSON array");
+  }
+
+  MetricToEntities m;
+  // Initialize the metric maps based on the specified options.
+  for (const auto& metric_to_display_name : opts.simple_metric_names) {
+    const auto& full_metric_name = metric_to_display_name.first;
+    InsertIfNotPresent(&m, full_metric_name, EntityIdToValue());
+  }
+  for (const auto& metric_to_display_name : opts.rate_metric_names) {
+    const auto& full_metric_name = metric_to_display_name.first;
+    InsertIfNotPresent(&m, full_metric_name, EntityIdToValue());
+  }
+  for (const auto& metric_to_display_name : opts.hist_metric_names) {
+    const auto& full_metric_name = metric_to_display_name.first;
+    InsertIfNotPresent(&m, full_metric_name, EntityIdToValue());
+  }
+
+  // Each json blob has a list of metrics for entities (see below for the
+  // contents of each <entity>):
+  //   [{<entity>}, {<entity>}, {<entity>}]
+  //
+  // Iterate through each entity blob and pick out the metrics.
+  for (const auto* entity_json = pl.json()->Begin();
+       entity_json != pl.json()->End();
+       ++entity_json) {
+    if (!entity_json->IsObject()) {
+      return Status::InvalidArgument("expected JSON object");
+    }
+    if (!entity_json->HasMember("type") ||
+        !entity_json->HasMember("id") ||
+        !entity_json->HasMember("metrics")) {
+      return Status::InvalidArgument(
+          Substitute("incomplete entity entry: $0", string(entity_json->GetString())));
+    }
+    const string entity_type = (*entity_json)["type"].GetString();
+    const string entity_id = (*entity_json)["id"].GetString();
+    if (entity_type != "tablet" && entity_type != "table" && entity_type != "server") {
+      return Status::InvalidArgument(
+          Substitute("unexpected entity type: $0", entity_type));
+    }
+    if (entity_type == "table" &&
+        !opts.table_ids.empty() && !ContainsKey(opts.table_ids, entity_id)) {
+      // If the table id doesn't match the user's filter, ignore it. If the
+      // list of table ids is empty, no table ids are ignored.
+      continue;
+    }
+    if (entity_type == "tablet" &&
+        !opts.tablet_ids.empty() && !ContainsKey(opts.tablet_ids, entity_id)) {
+      // If the tablet id doesn't match the user's filter, ignore it. If the
+      // tablet list is empty, no tablet ids are ignored.
+      continue;
+    }
+
+    // Each entity has a list of metrics:
+    //   [{"name":"<metric_name>","value":"<metric_value>"},
+    //    {"name":"<hist_metric_name>","counts":"<hist_metric_counts>"]
+    const auto& metrics = (*entity_json)["metrics"];
+    if (!metrics.IsArray()) {
+      return Status::InvalidArgument(
+          Substitute("expected metric array: $0", metrics.GetString()));
+    }
+    for (const auto* metric_json = metrics.Begin();
+         metric_json != metrics.End();
+         ++metric_json) {
+      if (!metric_json->HasMember("name")) {
+        return Status::InvalidArgument(
+            Substitute("expected 'name' field in metric entry: $0", metric_json->GetString()));
+      }
+      const string& metric_name = (*metric_json)["name"].GetString();
+      const string& full_metric_name = Substitute("$0.$1", entity_type, metric_name);
+      EntityIdToValue* entity_id_to_value = FindOrNull(m, full_metric_name);
+      if (!entity_id_to_value) {
+        // We're looking at a metric that the user hasn't requested. Ignore
+        // this entry.
+        continue;
+      }
+      MetricValue v;
+      Status s = v.FromJson(*metric_json);
+      if (!s.ok()) {
+        continue;
+      }
+      // We expect that in a given line, each entity reports a given metric
+      // only once. In case this isn't true, we just update the value.
+      EmplaceOrUpdate(entity_id_to_value, entity_id, std::move(v));
+    }
+  }
+  metric_to_entities.swap(m);
+  timestamp = pl.timestamp();
+  return Status::OK();
+}
+
+MetricValue::MetricValue()
+    : type_(MetricType::kUninitialized) {
+}
+
+Status MetricValue::FromJson(const rapidjson::Value& metric_json) {
+  DCHECK(MetricType::kUninitialized == type_);
+  DCHECK(!counts_.has_value());
+  DCHECK(!value_.has_value());
+  if (metric_json.HasMember("counts") && metric_json.HasMember("values")) {
+    // Add the counts from histogram metrics.
+    const rapidjson::Value& counts_json = metric_json["counts"];
+    const rapidjson::Value& values_json = metric_json["values"];
+    if (!counts_json.IsArray() || !values_json.IsArray()) {
+      return Status::InvalidArgument(
+          Substitute("expected 'counts' and 'values' to be arrays: $0",
+                     metric_json.GetString()));
+    }
+    if (counts_json.Size() != values_json.Size()) {
+      return Status::InvalidArgument(
+          "expected 'counts' and 'values' to be the same size");
+    }
+    std::map<int64_t, int> counts;
+    for (int i = 0; i < counts_json.Size(); i++) {
+      int64_t v = values_json[i].GetInt64();
+      int c = counts_json[i].GetInt();
+      InsertOrUpdate(&counts, v, c);
+    }
+    counts_ = std::move(counts);
+    type_ = MetricType::kHistogram;
+  } else if (metric_json.HasMember("value")) {
+    const rapidjson::Value& value_json = metric_json["value"];
+    if (!value_json.IsInt64()) {
+      return Status::InvalidArgument("expected 'value' to be an int type");
+    }
+    // Add the value from plain metrics.
+    value_ = value_json.GetInt64();
+    type_ = MetricType::kPlain;
+  } else {
+    return Status::InvalidArgument(
+        Substitute("unexpected metric formatting: $0", metric_json.GetString()));
+  }
+  return Status::OK();
+}
+
+MetricCollectingLogVisitor::MetricCollectingLogVisitor(MetricsCollectingOpts opts)
+    : opts_(std::move(opts)) {
+  vector<string> display_names = { "timestamp" };
+  // Create an initial entity-to-value map for every metric of interest.
+
+  for (const auto& full_and_display : opts_.simple_metric_names) {
+    const auto& full_name = full_and_display.first;
+    const auto& display_name = full_and_display.second;
+    LOG(INFO) << Substitute("collecting simple metric $0 as $1", full_name, display_name);
+    InsertIfNotPresent(&metric_to_entities_, full_name, EntityIdToValue());
+    display_names.emplace_back(display_name);
+  }
+  for (const auto& full_and_display : opts_.rate_metric_names) {
+    const auto& full_name = full_and_display.first;
+    const auto& display_name = full_and_display.second;
+    LOG(INFO) << Substitute("collecting rate metric $0 as $1", full_name, display_name);
+    InsertIfNotPresent(&metric_to_entities_, full_name, EntityIdToValue());
+    display_names.emplace_back(display_name);
+  }
+  for (const auto& full_and_display : opts_.hist_metric_names) {
+    const auto& full_name = full_and_display.first;
+    const auto& display_name = full_and_display.second;
+    LOG(INFO) << Substitute("collecting histogram metric $0 as $1", full_name, display_name);
+    InsertIfNotPresent(&metric_to_entities_, full_name, EntityIdToValue());
+    // TODO(awong): this is ugly.
+    display_names.emplace_back(Substitute("$0_count", display_name));
+    display_names.emplace_back(Substitute("$0_min", display_name));
+    display_names.emplace_back(Substitute("$0_p50", display_name));
+    display_names.emplace_back(Substitute("$0_p75", display_name));
+    display_names.emplace_back(Substitute("$0_p95", display_name));
+    display_names.emplace_back(Substitute("$0_p99", display_name));
+    display_names.emplace_back(Substitute("$0_p99_99", display_name));
+    display_names.emplace_back(Substitute("$0_max", display_name));
+  }
+  cout << JoinStrings(display_names, "\t") << std::endl;
+}
+Status MetricCollectingLogVisitor::ParseRecord(const ParsedLine& pl) {
+  // Do something with the metric record.
+  if (pl.type() == RecordType::kMetrics) {
+    MetricsRecord mr;
+    RETURN_NOT_OK(mr.FromParsedLine(opts_, pl));
+    RETURN_NOT_OK(VisitMetricsRecord(mr));
+    UpdateWithMetricsRecord(mr);
+  }
+  return Status::OK();
+}
+
+Status MetricCollectingLogVisitor::VisitMetricsRecord(const MetricsRecord& mr) {
+  // Iterate through the user-requested metrics and display what we need to, as
+  // determined by 'opts_'.
+  cout << mr.timestamp;
+  for (const auto& full_and_display : opts_.simple_metric_names) {
+    const auto& full_name = full_and_display.first;
+    int64_t sum = SumPlainWithMetricRecord(mr, full_name);
+    cout << Substitute("\t$0", sum);
+  }
+  const double time_delta_secs =
+      static_cast<double>(mr.timestamp - last_visited_timestamp_) / 1e6;
+  for (const auto& full_and_display : opts_.rate_metric_names) {
+    // If this is the first record we're visiting, we can't compute a rate.
+    const auto& full_name = full_and_display.first;
+    int64_t sum = SumPlainWithMetricRecord(mr, full_name);
+    if (last_visited_timestamp_ == 0) {
+      InsertOrDie(&rate_metric_prev_sum_, full_name, sum);
+      cout << "\t0";
+      continue;
+    }
+    int64_t prev_sum = FindOrDie(rate_metric_prev_sum_, full_name);
+    cout << Substitute("\t$0", static_cast<double>(sum - prev_sum) / time_delta_secs);
+    InsertOrUpdate(&rate_metric_prev_sum_, full_name, sum);
+  }
+  for (const auto& full_and_display : opts_.hist_metric_names) {
+    const auto& full_name = full_and_display.first;
+    const auto& mr_entities_to_vals = FindOrDie(mr.metric_to_entities, full_name);
+    const auto& entities_to_vals = FindOrDie(metric_to_entities_, full_name);
+    std::map<int64_t, int> all_counts;
+    // Aggregate any counts that didn't change, and thus, are not in 'mr'.
+    for (const auto& [entity, val] : entities_to_vals) {
+      CHECK(val.counts_);
+      if (!ContainsKey(mr_entities_to_vals, entity)) {
+        MergeTokenCounts(&all_counts, *val.counts_);
+      }
+    }
+    // Now aggregate the counts that did.
+    for (const auto& [_, val] : mr_entities_to_vals) {
+      CHECK(val.counts_);
+      MergeTokenCounts(&all_counts, *val.counts_);
+    }
+    // We need to at least display a zero count if the metric didn't show up at
+    // all.
+    if (all_counts.empty()) {
+      EmplaceOrDie(&all_counts, 0, 1);
+    }
+    int counts_size = 0;
+    for (const auto& [_, count]: all_counts) {
+      counts_size += count;
+    }
+    // TODO(awong): would using HdrHistogram be cleaner?
+    const vector<int> quantiles = { 0,
+                                    counts_size / 2,
+                                    static_cast<int>(counts_size * 0.75),
+                                    static_cast<int>(counts_size * 0.95),
+                                    static_cast<int>(counts_size * 0.99),
+                                    static_cast<int>(counts_size * 0.9999),
+                                    counts_size };
+    int quantile_idx = 0;
+    int count_idx = 0;
+    // Iterate through all the counts, aggregating the values.
+    cout << Substitute("\t$0", counts_size);
+    for (const auto& vals_and_counts : all_counts) {
+      count_idx += vals_and_counts.second;
+      // Keep on printing for as long as this count index falls within the
+      // current quantile.
+      while (quantile_idx < quantiles.size() &&
+             count_idx >= quantiles[quantile_idx]) {
+        cout << Substitute("\t$0", vals_and_counts.first);
+        quantile_idx++;
+      }
+    }
+  }
+  cout << std::endl;
+
+  return Status::OK();
+}
+
+void MetricCollectingLogVisitor::UpdateWithMetricsRecord(const MetricsRecord& mr) {
+  for (const auto& [metric_name, mr_entities_map] : mr.metric_to_entities) {
+    // Update the visitor's internal map with the metrics from the record.
+    // Note: The metric record should only have parsed user-requested metrics
+    // that the visitor has in its internal map, so we can FindOrDie here.
+    auto& visitor_entities_map = FindOrDie(metric_to_entities_, metric_name);
+    for (const auto& [id, val] : mr_entities_map) {
+      InsertOrUpdate(&visitor_entities_map, id, val);
+    }
+  }
+  last_visited_timestamp_ = mr.timestamp;
+}
+
+int64_t MetricCollectingLogVisitor::SumPlainWithMetricRecord(
+    const MetricsRecord& mr, const string& full_metric_name) const {
+  const auto& mr_entities_to_vals = FindOrDie(mr.metric_to_entities, full_metric_name);
+  const auto& entities_to_vals = FindOrDie(metric_to_entities_, full_metric_name);
+  int64_t sum = 0;
+  // Add all of the values for entities that didn't change, and are thus, not
+  // included in the record.
+  for (const auto& e : entities_to_vals) {
+    if (!ContainsKey(mr_entities_to_vals, e.first)) {
+      CHECK(e.second.value_);
+      sum += *e.second.value_;
+    }
+  }
+  // Now add the values for those that did.
+  for (const auto& e : mr_entities_to_vals) {
+    CHECK(e.second.value_);
+    sum += *e.second.value_;
+  }
+  return sum;
+}
+
+Status StackDumpingLogVisitor::ParseRecord(const ParsedLine& pl) {
+  // We're not going to do any fancy parsing, so do it up front with the default
+  // json parsing.
+  switch (pl.type()) {
+    case RecordType::kSymbols: {
+      SymbolsRecord sr;
+      RETURN_NOT_OK(sr.FromParsedLine(pl));
+      VisitSymbolsRecord(sr);
+      break;
+    }
+    case RecordType::kStacks: {
+      StacksRecord sr;
+      RETURN_NOT_OK(sr.FromParsedLine(pl));
+      VisitStacksRecord(sr);
+      break;
+    }
+    default:
+      break;
+  }
+  return Status::OK();
+}
+
+void StackDumpingLogVisitor::VisitSymbolsRecord(const SymbolsRecord& sr) {
+  InsertOrUpdateMany(&symbols_, sr.addr_to_symbol.begin(), sr.addr_to_symbol.end());
 }
 
 void StackDumpingLogVisitor::VisitStacksRecord(const StacksRecord& sr) {
+  static const string kUnknownSymbol = "<unknown>";
+
   if (!first_) {
     cout << endl << endl;
   }
@@ -76,7 +472,7 @@ void StackDumpingLogVisitor::VisitStacksRecord(const StacksRecord& sr) {
           << JoinMapped(group.tids, [](int t) { return std::to_string(t); }, ",")
           << "]" << endl;
     for (const auto& addr : group.frame_addrs) {
-      // NOTE: passing 'kUnknownSymbols' as the default instead of a "foo"
+      // NOTE: passing 'kUnknownSymbol' as the default instead of a "foo"
       // literal is important to avoid capturing a reference to a temporary.
       // See the FindWithDefault() docs for details.
       const auto& sym = FindWithDefault(symbols_, addr, kUnknownSymbol);
@@ -85,14 +481,14 @@ void StackDumpingLogVisitor::VisitStacksRecord(const StacksRecord& sr) {
   }
 }
 
-Status ParsedLine::Parse(string line) {
-  // Take ownership of the line to avoid copying substrings.
-  line_ = std::move(line);
+Status ParsedLine::Parse() {
+  RETURN_NOT_OK(ParseHeader());
+  return ParseJson();
+}
 
+Status ParsedLine::ParseHeader() {
   // Lines have the following format:
-  //
   //     I0220 17:38:09.950546 metrics 1519177089950546 <json blob>...
-  //
   if (line_.empty() || line_[0] != 'I') {
     return Status::InvalidArgument("lines must start with 'I'");
   }
@@ -106,114 +502,39 @@ Status ParsedLine::Parse(string line) {
   if (!safe_strto64(fields[3].data(), fields[3].size(), &time_us)) {
     return Status::InvalidArgument("invalid timestamp", fields[3]);
   }
-  // TODO(todd) JsonReader should be able to parse from a StringPiece
-  // directly instead of making the copy here.
-  json_.emplace(fields[4].ToString());
-  Status s = json_->Init();
-  if (!s.ok()) {
-    json_.reset();
-    return s.CloneAndPrepend("invalid JSON payload");
-  }
+  timestamp_ = time_us;
   date_ = fields[0];
   time_ = fields[1];
   if (fields[2] == "symbols") {
     type_ = RecordType::kSymbols;
   } else if (fields[2] == "stacks") {
     type_ = RecordType::kStacks;
+  } else if (fields[2] == "metrics") {
+    type_ = RecordType::kMetrics;
   } else {
     type_ = RecordType::kUnknown;
   }
+  json_.emplace(fields[4].ToString());
   return Status::OK();
 }
 
-string ParsedLine::date_time() const {
-  return Substitute("$0 $1", date_, time_);
-}
-
-LogParser::LogParser(LogVisitor* visitor)
-    : visitor_(CHECK_NOTNULL(visitor)) {
-}
-
-Status LogParser::ParseLine(string line) {
-  ParsedLine pl;
-  RETURN_NOT_OK(pl.Parse(std::move(line)));
-  switch (pl.type()) {
-    case RecordType::kSymbols:
-      RETURN_NOT_OK(ParseSymbols(pl));
-      break;
-    case RecordType::kStacks: {
-      RETURN_NOT_OK(ParseStacks(pl));
-      break;
-    }
-    default:
-      break;
-  }
-  return Status::OK();
-}
-
-Status LogParser::ParseSymbols(const ParsedLine& pl) {
-  CHECK_EQ(RecordType::kSymbols, pl.type());
-  if (!pl.json()->IsObject()) {
-    return Status::InvalidArgument("expected symbols data to be a JSON object");
-  }
-  for (auto it = pl.json()->MemberBegin();
-       it != pl.json()->MemberEnd();
-       ++it) {
-    if (PREDICT_FALSE(!it->value.IsString())) {
-      return Status::InvalidArgument("expected symbol values to be strings");
-    }
-    visitor_->VisitSymbol(it->name.GetString(), it->value.GetString());
+Status ParsedLine::ParseJson() {
+  // TODO(todd) JsonReader should be able to parse from a StringPiece
+  // directly instead of making the copy here.
+  Status s = json_->Init();
+  if (!s.ok()) {
+    json_.reset();
+    return s.CloneAndPrepend("invalid JSON payload");
   }
-
   return Status::OK();
 }
 
-Status LogParser::ParseStackGroup(const rapidjson::Value& group_json,
-                                  StacksRecord::Group* group) {
-  DCHECK(group);
-  StacksRecord::Group ret;
-  if (PREDICT_FALSE(!group_json.IsObject())) {
-    return Status::InvalidArgument("expected stacks groups to be JSON objects");
-  }
-  if (!group_json.HasMember("tids") || !group_json.HasMember("stack")) {
-    return Status::InvalidArgument("expected stacks groups to have frames and tids");
-  }
-
-  // Parse the tids.
-  const auto& tids = group_json["tids"];
-  if (PREDICT_FALSE(!tids.IsArray())) {
-    return Status::InvalidArgument("expected 'tids' to be an array");
-  }
-  ret.tids.reserve(tids.Size());
-  for (const auto* tid = tids.Begin();
-       tid != tids.End();
-       ++tid) {
-    if (PREDICT_FALSE(!tid->IsNumber())) {
-      return Status::InvalidArgument("expected 'tids' elements to be numeric");
-    }
-    ret.tids.push_back(tid->GetInt64());
-  }
-
-  // Parse and symbolize the stack trace itself.
-  const auto& stack = group_json["stack"];
-  if (PREDICT_FALSE(!stack.IsArray())) {
-    return Status::InvalidArgument("expected 'stack' to be an array");
-  }
-  for (const auto* frame = stack.Begin();
-       frame != stack.End();
-       ++frame) {
-    if (PREDICT_FALSE(!frame->IsString())) {
-      return Status::InvalidArgument("expected 'stack' elements to be strings");
-    }
-    ret.frame_addrs.emplace_back(frame->GetString());
-  }
-  *group = std::move(ret);
-  return Status::OK();
+string ParsedLine::date_time() const {
+  return Substitute("$0 $1", date_, time_);
 }
 
-Status LogParser::ParseStacks(const ParsedLine& pl) {
-  StacksRecord sr;
-  sr.date_time = pl.date_time();
+Status StacksRecord::FromParsedLine(const ParsedLine& pl) {
+  date_time = pl.date_time();
 
   const rapidjson::Value& json = *pl.json();
   if (!json.IsObject()) {
@@ -225,26 +546,64 @@ Status LogParser::ParseStacks(const ParsedLine& pl) {
     if (!json["reason"].IsString()) {
       return Status::InvalidArgument("expected stacks 'reason' to be a string");
     }
-    sr.reason = json["reason"].GetString();
+    reason = json["reason"].GetString();
   }
 
   // Parse groups.
   if (PREDICT_FALSE(!json.HasMember("groups"))) {
     return Status::InvalidArgument("no 'groups' field in stacks object");
   }
-  const auto& groups = json["groups"];
-  if (!groups.IsArray()) {
+  const auto& groups_json = json["groups"];
+  if (!groups_json.IsArray()) {
     return Status::InvalidArgument("'groups' field should be an array");
   }
 
-  for (const rapidjson::Value* group = groups.Begin();
-       group != groups.End();
+  for (const rapidjson::Value* group = groups_json.Begin();
+       group != groups_json.End();
        ++group) {
     StacksRecord::Group g;
     RETURN_NOT_OK(ParseStackGroup(*group, &g));
-    sr.groups.emplace_back(std::move(g));
+    groups.emplace_back(std::move(g));
+  }
+  return Status::OK();
+}
+
+Status LogFileParser::Init() {
+  errno = 0;
+  if (!fstream_.is_open() || !HasNext()) {
+    return Status::IOError(ErrnoToString(errno));
+  }
+  return Status::OK();
+}
+
+bool LogFileParser::HasNext() {
+  return fstream_.peek() != EOF;
+}
+
+Status LogFileParser::ParseLine() {
+  DCHECK(HasNext());
+  ++line_number_;
+  string line;
+  std::getline(fstream_, line);
+  ParsedLine pl(std::move(line));
+  const string error_msg = Substitute("failed to parse line $0 in file $1",
+                                      line_number_, path_);
+  RETURN_NOT_OK_PREPEND(pl.Parse(), error_msg);
+  RETURN_NOT_OK_PREPEND(log_visitor_->ParseRecord(pl), error_msg);
+  return Status::OK();
+}
+
+Status LogFileParser::Parse() {
+  string line;
+  Status s;
+  while (HasNext()) {
+    s = ParseLine();
+    if (s.IsEndOfFile()) {
+      LOG(INFO) << "Reached end of time range";
+      return Status::OK();
+    }
+    RETURN_NOT_OK(s);
   }
-  visitor_->VisitStacksRecord(std::move(sr));
   return Status::OK();
 }
 
diff --git a/src/kudu/tools/diagnostics_log_parser.h b/src/kudu/tools/diagnostics_log_parser.h
index 818d7e849..b2ac8bc96 100644
--- a/src/kudu/tools/diagnostics_log_parser.h
+++ b/src/kudu/tools/diagnostics_log_parser.h
@@ -17,11 +17,16 @@
 
 #pragma once
 
-#include <iostream>
+#include <cstddef>
+#include <cstdint>
+#include <fstream>
+#include <map>
 #include <optional>
+#include <set>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include <glog/logging.h>
@@ -36,10 +41,10 @@ namespace kudu {
 namespace tools {
 
 // One of the record types from the log.
-// TODO(KUDU-2353) support metrics records.
 enum class RecordType {
   kSymbols,
   kStacks,
+  kMetrics,
   kUnknown
 };
 
@@ -47,6 +52,46 @@ const char* RecordTypeToString(RecordType r);
 
 std::ostream& operator<<(std::ostream& o, RecordType r);
 
+// A parsed line from the diagnostics log.
+//
+// Each line contains a timestamp, a record type, and some JSON data.
+class ParsedLine {
+ public:
+  explicit ParsedLine(std::string line)
+      : line_(std::move(line)) {
+  }
+
+  // Parse a line from the diagnostics log.
+  Status Parse();
+
+  Status ParseHeader();
+  Status ParseJson();
+
+  RecordType type() const { return type_; }
+
+  const rapidjson::Value* json() const {
+    CHECK(json_);
+    return json_->root();
+  }
+
+  std::string date_time() const;
+
+  int64_t timestamp() const { return timestamp_; }
+
+ private:
+  const std::string line_;
+  RecordType type_;
+
+  // date_ and time_ point to substrings of line_.
+  StringPiece date_;
+  StringPiece time_;
+  int64_t timestamp_;
+
+  // A JsonReader initialized from the most recent line.
+  // This will be 'none' before any lines have been read.
+  std::optional<JsonReader> json_;
+};
+
 // A stack sample from the log.
 struct StacksRecord {
   // A group of threads which share the same stack trace.
@@ -57,6 +102,8 @@ struct StacksRecord {
     std::vector<std::string> frame_addrs;
   };
 
+  Status FromParsedLine(const ParsedLine& pl);
+
   // The time the stack traces were collected.
   std::string date_time;
 
@@ -71,79 +118,177 @@ struct StacksRecord {
 class LogVisitor {
  public:
   virtual ~LogVisitor() {}
-  virtual void VisitSymbol(const std::string& addr, const std::string& symbol) = 0;
-  virtual void VisitStacksRecord(const StacksRecord& sr) = 0;
+  virtual Status ParseRecord(const ParsedLine& pl) = 0;
 };
 
-// LogVisitor implementation which dumps the parsed stack records to cout.
-class StackDumpingLogVisitor : public LogVisitor {
+enum class MetricType {
+  kUninitialized,
+  // A metric represented by a single value.
+  kPlain,
+  // A metric represented by counts of values.
+  kHistogram,
+};
+
+// A value that a metric can have. Depending on the type of metric this is for,
+// the underlying value may be represented by a single value or by many (e.g.
+// in the case of histograms).
+class MetricValue {
  public:
-  void VisitSymbol(const std::string& addr, const std::string& symbol) override;
+  MetricValue();
 
-  void VisitStacksRecord(const StacksRecord& sr) override;
+  // Sets the metric values based on the input 'metric_json'.
+  Status FromJson(const rapidjson::Value& metric_json);
 
- private:
-  // True when we have not yet output any data.
-  bool first_ = true;
-  // Map from symbols to name.
-  std::unordered_map<std::string, std::string> symbols_;
+  // The type of this metric value.
+  MetricType type() const { return type_; }
+ protected:
+  friend class MetricCollectingLogVisitor;
+  MetricType type_;
 
-  const std::string kUnknownSymbol = "<unknown>";
+  std::optional<int64_t> value_;
+  std::optional<std::map<int64_t, int>> counts_;
 };
 
-// A parsed line from the diagnostics log.
+// For a given metric, a collection of entity IDs and their metric values.
+typedef std::unordered_map<std::string, MetricValue> EntityIdToValue;
+
+// Mapping from a full metric name to the collection of entity IDs and their
+// metric values, i.e.
+//   { <entity type>.<metric name>:string =>
+//       { <entity id>:string => <metric value>:MetricValue } }
+typedef std::unordered_map<std::string, EntityIdToValue> MetricToEntities;
+
+struct MetricsCollectingOpts {
+  // Maps the full metric name to its display name.
+  // The full metric name refers to "<entity type>.<metric name>".
+  typedef std::unordered_map<std::string, std::string> NameMap;
+
+  // The metric names and display names of the metrics of interest.
+  NameMap simple_metric_names;
+  NameMap rate_metric_names;
+  NameMap hist_metric_names;
+
+  // Set of table IDs whose metrics that should be aggregated.
+  // If empty, all tables' metrics are aggregated.
+  std::set<std::string> table_ids;
+
+  // Set of tablet IDs whose metrics that should be aggregated.
+  // If empty, all tablets' metrics are aggregated.
+  std::set<std::string> tablet_ids;
+};
+
+// A record containing the metrics for a single line.
+struct MetricsRecord {
+  // Populate this record with the contents of 'pl', only considering metrics
+  // specified by 'opts'.
+  Status FromParsedLine(const MetricsCollectingOpts& opts, const ParsedLine& pl);
+
+  // Maps the full metric name to the mapping between entity ID and metric for
+  // that entity.
+  MetricToEntities metric_to_entities;
+
+  // The timestamp associated with this record.
+  int64_t timestamp;
+};
+
+// LogVisitor that collects metrics, tracking values, aggregating counts, etc.
+// and prints them out.
 //
-// Each line contains a timestamp, a record type, and some JSON data.
-class ParsedLine {
+// A single MetricsCollectingLogVisitor may be used by multiple LogFileParsers.
+class MetricCollectingLogVisitor : public LogVisitor {
  public:
-  // Parse a line from the diagnostics log.
-  Status Parse(std::string line);
+  // Initializes the internal map to include the metrics specified by 'opts'.
+  explicit MetricCollectingLogVisitor(MetricsCollectingOpts opts);
 
-  RecordType type() const { return type_; }
+  // Takes a parsed line and parses its metric record if one exists. If 'pl'
+  // doesn't contain a metric record, this is a no-op.
+  Status ParseRecord(const ParsedLine& pl) override;
+ private:
+  // Prints the appropriate metrics from 'mr' and this visitor's internal maps.
+  Status VisitMetricsRecord(const MetricsRecord& mr);
 
-  const rapidjson::Value* json() const {
-    CHECK(json_);
-    return json_->root();
-  }
+  // Updates the internal maps to include the metrics in 'mr'.
+  void UpdateWithMetricsRecord(const MetricsRecord& mr);
 
-  std::string date_time() const;
+  // Calculate the sum of the plain metric (i.e. non-histogram) specified by
+  // 'full_metric_name', based on the existing values in our internal map and
+  // including any new values for entities in 'mr'.
+  int64_t SumPlainWithMetricRecord(const MetricsRecord& mr,
+                                   const std::string& full_metric_name) const;
 
- private:
-  std::string line_;
-  RecordType type_;
+  // Maps the full metric name to the mapping between entity IDs and their
+  // metric value. As the visitor visits new metrics records, this gets updated
+  // with the most up-to-date values.
+  //
+  // Note: we need to track per-entity metrics because, when logging, Kudu may
+  // omit metrics for entities if they don't change.
+  MetricToEntities metric_to_entities_;
+
+  // Maps the full metric name of a rate metric to the previous sum computed
+  // for that metric by this visitor.
+  std::map<std::string, int64_t> rate_metric_prev_sum_;
 
-  // date_ and time_ point to substrings of line_.
-  StringPiece date_;
-  StringPiece time_;
 
   // A JsonReader initialized from the most recent line.
   // This will be 'none' before any lines have been read.
   std::optional<JsonReader> json_;
+  //
+  // The timestamp of the last visited metrics record.
+  int64_t last_visited_timestamp_ = 0;
+
+  const MetricsCollectingOpts opts_;
 };
 
-// Parser for a metrics log.
-//
-// Each line should be fed to LogParser::ParseLine().
+struct SymbolsRecord {
+  Status FromParsedLine(const ParsedLine& pl);
+  std::unordered_map<std::string, std::string> addr_to_symbol;
+};
+
+// LogVisitor implementation which dumps the parsed stack records to std::cout.
+class StackDumpingLogVisitor : public LogVisitor {
+ public:
+  Status ParseRecord(const ParsedLine& pl) override;
+
+ private:
+  void VisitSymbolsRecord(const SymbolsRecord& sr);
+  void VisitStacksRecord(const StacksRecord& sr);
+  // True when we have not yet output any data.
+  bool first_ = true;
+  // Map from symbols to name.
+  std::unordered_map<std::string, std::string> symbols_;
+};
+
+// Parser for a diagnostic log files that may include stacks or metrics logs.
 //
 // This instance follows a 'SAX' model. As records are available, the appropriate
 // functions are invoked on the visitor provided in the constructor.
-class LogParser {
+class LogFileParser {
  public:
-  explicit LogParser(LogVisitor* visitor);
+  explicit LogFileParser(LogVisitor* lv, std::string path)
+      : path_(std::move(path)),
+        fstream_(path_),
+        log_visitor_(lv) {}
 
-  // Parse the next line of the log. This function may invoke the appropriate
-  // visitor functions zero or more times.
-  Status ParseLine(std::string line);
+  // Initializes internal state, e.g. the file stream for the log file.
+  Status Init();
 
- private:
-  Status ParseSymbols(const ParsedLine& lf);
+  // Returns whether or not the underlying file has more lines to parse.
+  bool HasNext();
 
-  static Status ParseStackGroup(const rapidjson::Value& group_json,
-                                StacksRecord::Group* group);
+  // Parses the rest of the lines in the file.
+  Status Parse();
+
+ private:
+  // Parses the next line in the file. Should only be called if HasNext()
+  // returns true.
+  Status ParseLine();
 
-  Status ParseStacks(const ParsedLine& lf);
+  size_t line_number_ = 0;
+  const std::string path_;
+  std::ifstream fstream_;
 
-  LogVisitor* visitor_;
+  // Visitor for doing something with each parsed line.
+  LogVisitor* log_visitor_;
 };
 
 } // namespace tools
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 0c6cc294f..76b350707 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1235,6 +1235,7 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     const vector<string> kDiagnoseModeRegexes = {
         "parse_stacks.*Parse sampled stack traces",
+        "parse_metrics.*Parse metrics out of a diagnostics log",
     };
     NO_FATALS(RunTestHelp("diagnose", kDiagnoseModeRegexes));
   }
@@ -7837,11 +7838,72 @@ TEST_F(ToolTest, TestParseStacks) {
   Status s = RunActionStderrString(
       Substitute("diagnose parse_stacks $0", kBadDataPath),
       &stderr);
-  ASSERT_TRUE(s.IsRuntimeError());
-  ASSERT_STR_MATCHES(stderr, "failed to parse stacks from .*: at line 1: "
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+  ASSERT_STR_MATCHES(stderr, "failed to parse stacks from .* line 1.*"
                              "invalid JSON payload.*Missing a closing quotation mark in string");
 }
 
+TEST_F(ToolTest, TestParseMetrics) {
+  const string kDataPath = JoinPathSegments(GetTestExecutableDirectory(),
+                                            "testdata/sample-diagnostics-log.txt");
+  const string kBadDataPath = JoinPathSegments(GetTestExecutableDirectory(),
+                                               "testdata/bad-diagnostics-log.txt");
+  // Check out tablet metrics.
+  {
+    string stdout;
+    NO_FATALS(RunActionStdoutString(
+        Substitute("diagnose parse_metrics $0 -simple-metrics=tablet.on_disk_size:size_bytes "
+                   "-rate-metrics=tablet.on_disk_size:bytes_rate", kDataPath),
+        &stdout));
+    // Spot check a few of the things that should be in the output.
+    ASSERT_STR_CONTAINS(stdout, "timestamp\tsize_bytes\tbytes_rate");
+    ASSERT_STR_CONTAINS(stdout, "1521053715220449\t69705682\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053775220513\t69705682\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053835220611\t69712809\t118.78313932087244");
+    ASSERT_STR_CONTAINS(stdout, "1521053895220710\t69712809\t0");
+    ASSERT_STR_CONTAINS(stdout, "1661840031227204\t69712809\t0");
+  }
+  // Check out table metrics.
+  {
+    string stdout;
+    NO_FATALS(RunActionStdoutString(
+        Substitute("diagnose parse_metrics $0 "
+                   "-simple-metrics=table.on_disk_size:size,table.live_row_count:row_count",
+                   kDataPath),
+        &stdout));
+    // Spot check a few of the things that should be in the output.
+    ASSERT_STR_CONTAINS(stdout, "timestamp\trow_count\tsize");
+    ASSERT_STR_CONTAINS(stdout, "1521053715220449\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053775220513\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053835220611\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053895220710\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1661840031227204\t228265\t78540528");
+  }
+  // Check out table metrics with default metric names.
+  {
+    string stdout;
+    NO_FATALS(RunActionStdoutString(
+        Substitute("diagnose parse_metrics $0 "
+                   "-simple-metrics=table.on_disk_size,table.live_row_count",
+                   kDataPath),
+        &stdout));
+    // Spot check a few of the things that should be in the output.
+    ASSERT_STR_CONTAINS(stdout, "timestamp\tlive_row_count\ton_disk_size");
+    ASSERT_STR_CONTAINS(stdout, "1521053715220449\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053775220513\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053835220611\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1521053895220710\t0\t0");
+    ASSERT_STR_CONTAINS(stdout, "1661840031227204\t228265\t78540528");
+  }
+
+  string stderr;
+  ASSERT_OK(RunActionStderrString(
+      Substitute("diagnose parse_metrics $0 -simple-metrics=tablet.on_disk_size:size",
+      kBadDataPath), &stderr));
+  ASSERT_STR_MATCHES(stderr, "Skipping file.*invalid JSON payload.*Missing "
+                             "a closing quotation mark in string");
+}
+
 TEST_F(ToolTest, ClusterNameResolverEnvNotSet) {
   const string kClusterName = "external_mini_cluster";
   CHECK_ERR(unsetenv("KUDU_CONFIG"));
diff --git a/src/kudu/tools/testdata/sample-diagnostics-log.txt b/src/kudu/tools/testdata/sample-diagnostics-log.txt
index 6cde69fe4..5b86395e9 100644
--- a/src/kudu/tools/testdata/sample-diagnostics-log.txt
+++ b/src/kudu/tools/testdata/sample-diagnostics-log.txt
@@ -8,3 +8,4 @@ I0314 11:56:15.220513 metrics 1521053775220513 [{"type":"tablet","id":"000000000
 I0314 11:57:11.020710 stacks 1521053831020710 {"reason":"periodic","groups":[{"tids":[37480],"stack":["0x3f5ec0f710","0x3f5e8df0d3","0xa52153","0x3f5ec079d1","0x3f5e8e88fd"]},{"tids":[37274],"stack":["0x3f5ec0f710","0x3f5e8e5159","0x1caba0d","0x1caef51","0xa30202","0xa339c0","0x1d46ca1","0x3f5ec079d1","0x3f5e8e88fd"]},{"tids":[37273,37272,37271,37270,37269,37268,37267,37266,37265,37264,37263,37261,37260,37259,37258,37262],"stack":["0x3f5ec0f710","0x3f5e8e8ef3","0x1d87173","0x1d8a80c","0x [...]
 I0314 11:57:15.220611 metrics 1521053835220611 [{"type":"tablet","id":"00000000000000000000000000000000","metrics":[{"name":"memrowset_size","value":265},{"name":"on_disk_data_size","value":54961885},{"name":"rows_updated","value":656},{"name":"rows_deleted","value":1},{"name":"scans_started","value":10},{"name":"bloom_lookups","value":1314},{"name":"key_file_lookups","value":1314},{"name":"delta_file_lookups","value":1314},{"name":"bloom_lookups_per_op","total_count":657,"min":2,"mean": [...]
 I0314 11:58:15.220710 metrics 1521053895220710 [{"type":"tablet","id":"00000000000000000000000000000000","metrics":[{"name":"memrowset_size","value":265},{"name":"on_disk_data_size","value":54961885},{"name":"rows_updated","value":781},{"name":"rows_deleted","value":1},{"name":"scans_started","value":12},{"name":"bloom_lookups","value":1564},{"name":"key_file_lookups","value":1564},{"name":"delta_file_lookups","value":1564},{"name":"bloom_lookups_per_op","total_count":782,"min":2,"mean": [...]
+I0830 13:13:51.227204 metrics 1661840031227204 [{"type":"table","id":"ff37a03f59074a60b5f056c92a94c255","metrics":[{"name":"live_row_count","value":8},{"name":"on_disk_size","value":8422017}]},{"type":"table","id":"ff2da5071f9643bdb74f6ab4d6a7fddf","metrics":[{"name":"live_row_count","value":178894},{"name":"on_disk_size","value":36993799}]},{"type":"table","id":"fe78d48a0741412cbc2ed2abce0869aa","metrics":[{"name":"live_row_count","value":5},{"name":"on_disk_size","value":8421004}]},{"t [...]
diff --git a/src/kudu/tools/tool_action_diagnose.cc b/src/kudu/tools/tool_action_diagnose.cc
index ccbdf84d3..52553b7d3 100644
--- a/src/kudu/tools/tool_action_diagnose.cc
+++ b/src/kudu/tools/tool_action_diagnose.cc
@@ -16,31 +16,52 @@
 // under the License.
 
 #include <algorithm>
-#include <cerrno>
 #include <fstream> // IWYU pragma: keep
 #include <functional>
+#include <iterator>
 #include <memory>
+#include <set>
 #include <string>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include <gflags/gflags_declare.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
 
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/tools/diagnostics_log_parser.h"
 #include "kudu/tools/tool_action.h"
-#include "kudu/util/errno.h"
+#include "kudu/util/regex.h"
 #include "kudu/util/status.h"
 
+DEFINE_string(table_ids, "",
+              "comma-separated list of table identifiers to aggregate table "
+              "metrics across; defaults to aggregating all tables");
+DEFINE_string(tablet_ids, "",
+              "comma-separated list of tablet identifiers to aggregate tablet "
+              "metrics across; defaults to aggregating all tablets");
+DEFINE_string(simple_metrics, "",
+              "comma-separated list of metrics to parse, of the format "
+              "<entity>.<metric name>[:<display name>], where <entity> must be "
+              "one of 'server', 'table', or 'tablet', and <metric name> refers "
+              "to the metric name; <display name> is optional and stands for "
+              "the column name/tag that the metric is output with");
+DEFINE_string(rate_metrics, "",
+              "comma-separated list of metrics to compute the rates of");
+DEFINE_string(histogram_metrics, "",
+              "comma-separated list of histogram metrics to parse "
+              "percentiles for");
+
 DECLARE_int64(timeout_ms);
 DECLARE_string(format);
 
 using std::ifstream;
 using std::string;
 using std::unique_ptr;
-using std::unordered_map;
 using std::vector;
+using strings::Split;
 using strings::Substitute;
 
 namespace kudu {
@@ -52,23 +73,10 @@ constexpr const char* const kLogPathArg = "path";
 constexpr const char* const kLogPathArgDesc =
     "Path(s) to log file(s) to parse, separated by a whitespace, if many";
 
-Status ParseStacksFromPath(const string& path) {
-  errno = 0;
-  ifstream in(path);
-  if (!in.is_open()) {
-    return Status::IOError(ErrnoToString(errno));
-  }
-  StackDumpingLogVisitor dlv;
-  LogParser lp(&dlv);
-  string line;
-  int line_number = 0;
-  while (std::getline(in, line)) {
-    line_number++;
-    RETURN_NOT_OK_PREPEND(lp.ParseLine(std::move(line)),
-                          Substitute("at line $0", line_number));
-  }
-
-  return Status::OK();
+Status ParseStacksFromPath(StackDumpingLogVisitor* dlv, const string& path) {
+  LogFileParser fp(dlv, path);
+  RETURN_NOT_OK(fp.Init());
+  return fp.Parse();
 }
 
 Status ParseStacks(const RunnerContext& context) {
@@ -76,13 +84,105 @@ Status ParseStacks(const RunnerContext& context) {
   // The file names are such that lexicographic sorting reflects
   // timestamp-based sorting.
   std::sort(paths.begin(), paths.end());
+  StackDumpingLogVisitor dlv;
   for (const auto& path : paths) {
-    RETURN_NOT_OK_PREPEND(ParseStacksFromPath(path),
+    RETURN_NOT_OK_PREPEND(ParseStacksFromPath(&dlv, path),
                           Substitute("failed to parse stacks from $0", path));
   }
   return Status::OK();
 }
 
+struct MetricNameParams {
+  string entity;
+  string metric_name;
+  string display_name;
+};
+
+// Parses a metric parameter of the form <entity>.<metric>:<display name>.
+Status SplitMetricNameParams(const string& name_str, MetricNameParams* out) {
+
+  vector<string> matches;
+  static KuduRegex re("([-_[:alpha:]]+)\\.([-_[:alpha:]]+):?([-_[:alpha:]]+)?", 3);
+  if (!re.Match(name_str, &matches)) {
+    return Status::InvalidArgument(Substitute("could not parse metric "
+       "parameter. Expected <entity>.<metric>:<display name>, got $0", name_str));
+  }
+  DCHECK_EQ(3, matches.size());
+  *out = { std::move(matches[0]), std::move(matches[1]), std::move(matches[2]) };
+  return Status::OK();
+}
+
+// Splits the input string by ','.
+vector<string> SplitOnComma(const string& str) {
+  return Split(str, ",", strings::SkipEmpty());
+}
+
+// Takes a metric name parameter string and inserts the metric names into the
+// name map.
+Status AddMetricsToDisplayNameMap(const string& metric_params_str,
+                                  MetricsCollectingOpts::NameMap* name_map) {
+  if (metric_params_str.empty()) {
+    return Status::OK();
+  }
+  vector<string> metric_params = SplitOnComma(metric_params_str);
+  for (const auto& metric_param : metric_params) {
+    MetricNameParams params;
+    RETURN_NOT_OK(SplitMetricNameParams(metric_param, &params));
+    if (params.display_name.empty()) {
+      params.display_name = params.metric_name;
+    }
+    const string& entity = params.entity;
+    if (entity != "server" && entity != "table" && entity != "tablet") {
+      return Status::InvalidArgument(
+          Substitute("unexpected entity type: $0", entity));
+    }
+    const string& metric_name = params.metric_name;
+    const string full_name = Substitute("$0.$1", entity, metric_name);
+    if (!EmplaceIfNotPresent(name_map, full_name, std::move(params.display_name))) {
+      return Status::InvalidArgument(
+          Substitute("duplicate metric name for $0.$1", entity, metric_name));
+    }
+  }
+  return Status::OK();
+}
+
+Status ParseMetrics(const RunnerContext& context) {
+  // Parse the metric name parameters.
+  MetricsCollectingOpts opts;
+  RETURN_NOT_OK(AddMetricsToDisplayNameMap(FLAGS_simple_metrics,
+                                           &opts.simple_metric_names));
+  RETURN_NOT_OK(AddMetricsToDisplayNameMap(FLAGS_rate_metrics,
+                                           &opts.rate_metric_names));
+  RETURN_NOT_OK(AddMetricsToDisplayNameMap(FLAGS_histogram_metrics,
+                                           &opts.hist_metric_names));
+
+  // Parse the table ids.
+  if (!FLAGS_table_ids.empty()) {
+    auto ids = SplitOnComma(FLAGS_table_ids);
+    std::move(ids.begin(), ids.end(),
+              std::inserter(opts.table_ids, opts.table_ids.end()));
+  }
+  // Parse the tablet ids.
+  if (!FLAGS_tablet_ids.empty()) {
+    auto ids = SplitOnComma(FLAGS_tablet_ids);
+    std::move(ids.begin(), ids.end(),
+              std::inserter(opts.tablet_ids, opts.tablet_ids.end()));
+  }
+
+  // Sort the files lexicographically to put them in increasing timestamp order.
+  vector<string> paths = context.variadic_args;
+  std::sort(paths.begin(), paths.end());
+  MetricCollectingLogVisitor mlv(std::move(opts));
+  for (const string& path : paths) {
+    LogFileParser lp(&mlv, path);
+    Status s = lp.Init().AndThen([&lp] {
+      return lp.Parse();
+    });
+    WARN_NOT_OK(s, Substitute("Skipping file $0", path));
+  }
+  return Status::OK();
+}
+
 } // anonymous namespace
 
 unique_ptr<Mode> BuildDiagnoseMode() {
@@ -92,9 +192,21 @@ unique_ptr<Mode> BuildDiagnoseMode() {
       .AddRequiredVariadicParameter({ kLogPathArg, kLogPathArgDesc })
       .Build();
 
+  // TODO(awong): add timestamp bounds
+  unique_ptr<Action> parse_metrics =
+      ActionBuilder("parse_metrics", &ParseMetrics)
+      .Description("Parse metrics out of a diagnostics log")
+      .AddRequiredVariadicParameter({ kLogPathArg, kLogPathArgDesc })
+      .AddOptionalParameter("tablet_ids")
+      .AddOptionalParameter("simple_metrics")
+      .AddOptionalParameter("rate_metrics")
+      .AddOptionalParameter("histogram_metrics")
+      .Build();
+
   return ModeBuilder("diagnose")
       .Description("Diagnostic tools for Kudu servers and clusters")
       .AddAction(std::move(parse_stacks))
+      .AddAction(std::move(parse_metrics))
       .Build();
 }
 
diff --git a/src/kudu/util/regex.h b/src/kudu/util/regex.h
new file mode 100644
index 000000000..341811ba6
--- /dev/null
+++ b/src/kudu/util/regex.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include <glog/logging.h>
+#include <regex.h>
+
+// Wrapper around regex_t to get the regex matches from a string.
+class KuduRegex {
+ public:
+  // A regex for the given pattern that is expected to have 'num_matches'
+  // matches.
+  KuduRegex(const char* pattern, size_t num_matches)
+      : num_matches_(num_matches) {
+    CHECK_EQ(0, regcomp(&re_, pattern, REG_EXTENDED));
+  }
+
+  bool Match(const std::string& in, std::vector<std::string>* matches) {
+    regmatch_t match[num_matches_ + 1];
+    if (regexec(&re_, in.c_str(), num_matches_ + 1, match, 0) != 0) {
+      return false;
+    }
+    for (size_t i = 1; i < num_matches_ + 1; ++i) {
+      if (match[i].rm_so == -1) {
+        // An empty string for each optional regex component.
+        matches->emplace_back("");
+      } else {
+        matches->emplace_back(in.substr(match[i].rm_so,
+                                        match[i].rm_eo - match[i].rm_so));
+      }
+    }
+    return true;
+  }
+
+ private:
+   const size_t num_matches_;
+   regex_t re_;
+};


[kudu] 01/02: [test] make master_authz-itest more robust

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 54c58fb8c521df6f220c097d15ca70c473beed7f
Author: kedeng <kd...@gmail.com>
AuthorDate: Thu Sep 29 11:45:08 2022 +0800

    [test] make master_authz-itest more robust
    
    I note the master_authz-itest unit-test may fail like this :
    `
    I20220929 10:56:08.093688   728 heartbeater.cc:466] Master 127.31.156.254:23975 was elected leader, sending a full tablet report...
    9月 29 10:56:16 10-120-18-85 krb5kdc[32375](info): TGS_REQ (2 etypes {17 16}) 127.0.0.1: ISSUE: authtime 1664420166, etypes {rep=17 tkt=17 ses=17}, kudu/127.31.156.254@KRBTEST.COM for HTTP/127.31.156.212@KRBTEST.COM
    I20220929 10:56:17.372444   532 master_service.cc:901] request to refresh authz privileges cache from {username='test-admin', principal='test-admin@KRBTEST.COM'} at 127.0.0.1:17302
    /data/github-kudu/kudu/src/kudu/integration-tests/master_authz-itest.cc:597: Failure
    Failed
    Bad status: Timed out: RefreshAuthzCache RPC to 127.31.156.254:23975 timed out after 10.000s (SENT)
    /data/github-kudu/kudu/src/kudu/integration-tests/master_authz-itest.cc:1083: Failure
    Expected: SetUpCluster(std::get<0>(GetParam())) doesn't generate new fatal failures in the current thread.
      Actual: it does.
    I20220929 10:56:27.373370 32371 external_mini_cluster-itest-base.cc:80] Found fatal failure
    `
    I have tested this case many times and found that all the failure is due to timeout.
    So I add retry policy for timeout to avoid direct failure in case of timeout.
    
    Change-Id: I2da13d18a8d7d6bcff5a41cdbad6d6ade6d528f3
    Reviewed-on: http://gerrit.cloudera.org:8080/19053
    Tested-by: Kudu Jenkins
    Reviewed-by: Yingchun Lai <ac...@gmail.com>
---
 src/kudu/integration-tests/master_authz-itest.cc | 214 +++++++++++------------
 1 file changed, 107 insertions(+), 107 deletions(-)

diff --git a/src/kudu/integration-tests/master_authz-itest.cc b/src/kudu/integration-tests/master_authz-itest.cc
index 16a5ef47b..cb8857e88 100644
--- a/src/kudu/integration-tests/master_authz-itest.cc
+++ b/src/kudu/integration-tests/master_authz-itest.cc
@@ -22,6 +22,7 @@
 #include <string>
 #include <thread>
 #include <tuple>
+#include <type_traits>
 #include <unordered_set>
 #include <utility>
 #include <vector>
@@ -33,14 +34,13 @@
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h" // IWYU pragma: keep
 #include "kudu/common/table_util.h"
-#include "kudu/common/wire_protocol.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/hms/hms_client.h"
 #include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/external_mini_cluster-itest-base.h"
 #include "kudu/master/master.pb.h"
-#include "kudu/master/master.proxy.h"
+#include "kudu/master/master.proxy.h" // IWYU pragma: keep
 #include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/ranger/mini_ranger.h"
 #include "kudu/ranger/ranger.pb.h"
@@ -163,16 +163,16 @@ class MasterAuthzITestHarness {
                                     table_id, &table_locations);
   }
 
-  static Status RefreshAuthzPolicies(const unique_ptr<ExternalMiniCluster>& cluster) {
+  static void RefreshAuthzPolicies(const unique_ptr<ExternalMiniCluster>& cluster) {
     RefreshAuthzCacheRequestPB req;
     RefreshAuthzCacheResponsePB resp;
-    RpcController rpc;
-    rpc.set_timeout(MonoDelta::FromSeconds(10));
-    RETURN_NOT_OK(cluster->master_proxy()->RefreshAuthzCache(req, &resp, &rpc));
-    if (resp.has_error()) {
-      return StatusFromPB(resp.error().status());
-    }
-    return Status::OK();
+
+    ASSERT_EVENTUALLY([&] {
+      RpcController rpc;
+      rpc.set_timeout(MonoDelta::FromSeconds(10));
+      ASSERT_OK(cluster->master_proxy()->RefreshAuthzCache(req, &resp, &rpc));
+      ASSERT_FALSE(resp.has_error());
+    });
   }
 
   static Status IsCreateTableDone(const OperationParams& p,
@@ -332,30 +332,30 @@ class MasterAuthzITestHarness {
     return opts;
   }
 
-  virtual Status GrantCreateTablePrivilege(const PrivilegeParams& p,
-                                           const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantDropTablePrivilege(const PrivilegeParams& p,
+  virtual void GrantCreateTablePrivilege(const PrivilegeParams& p,
                                          const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantAlterTablePrivilege(const PrivilegeParams& p,
-                                          const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantAlterWithGrantTablePrivilege(
+  virtual void GrantDropTablePrivilege(const PrivilegeParams& p,
+                                       const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantAlterTablePrivilege(const PrivilegeParams& p,
+                                        const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantAlterWithGrantTablePrivilege(
       const PrivilegeParams& p,
       const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantRenameTablePrivilege(const PrivilegeParams& p,
-                                           const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantGetMetadataTablePrivilege(const PrivilegeParams& p,
-                                                const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p,
-                                                   const unique_ptr<ExternalMiniCluster>& cluster)
-    = 0;
-  virtual Status GrantAllTablePrivilege(const PrivilegeParams& p,
-                                        const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantAllDatabasePrivilege(const PrivilegeParams& p,
-                                           const unique_ptr<ExternalMiniCluster>& cluster) = 0;
-  virtual Status GrantAllWithGrantTablePrivilege(const PrivilegeParams& p,
+  virtual void GrantRenameTablePrivilege(const PrivilegeParams& p,
+                                         const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantGetMetadataTablePrivilege(const PrivilegeParams& p,
+                                              const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p,
                                                  const unique_ptr<ExternalMiniCluster>& cluster)
     = 0;
-  virtual Status GrantAllWithGrantDatabasePrivilege(
+  virtual void GrantAllTablePrivilege(const PrivilegeParams& p,
+                                      const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantAllDatabasePrivilege(const PrivilegeParams& p,
+                                         const unique_ptr<ExternalMiniCluster>& cluster) = 0;
+  virtual void GrantAllWithGrantTablePrivilege(const PrivilegeParams& p,
+                                               const unique_ptr<ExternalMiniCluster>& cluster)
+    = 0;
+  virtual void GrantAllWithGrantDatabasePrivilege(
       const PrivilegeParams& p,
       const unique_ptr<ExternalMiniCluster>& cluster) = 0;
   virtual Status CreateTable(const OperationParams& p,
@@ -386,25 +386,25 @@ class RangerITestHarness : public MasterAuthzITestHarness {
  public:
   static constexpr int kSleepAfterNewPolicyMs = 1400;
 
-  Status GrantCreateTablePrivilege(const PrivilegeParams& p,
-                                   const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantCreateTablePrivilege(const PrivilegeParams& p,
+                                 const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     // IsCreateTableDone() requires METADATA on the table level.
     policy.tables.emplace_back("*");
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::CREATE}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantDropTablePrivilege(const PrivilegeParams& p,
-                                 const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantDropTablePrivilege(const PrivilegeParams& p,
+                               const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::DROP}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
   void CheckTableDoesNotExist(const string& database_name, const string& table_name,
@@ -414,111 +414,111 @@ class RangerITestHarness : public MasterAuthzITestHarness {
     ASSERT_TRUE(s.IsNotFound()) << s.ToString();
   }
 
-  Status GrantAlterTablePrivilege(const PrivilegeParams& p,
-                                  const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantAlterTablePrivilege(const PrivilegeParams& p,
+                                const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALTER}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantAlterWithGrantTablePrivilege(
+  void GrantAlterWithGrantTablePrivilege(
       const PrivilegeParams& p,
       const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALTER}, true));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantRenameTablePrivilege(const PrivilegeParams& p,
-                                   const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantRenameTablePrivilege(const PrivilegeParams& p,
+                                 const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy_new_table;
     policy_new_table.databases.emplace_back(p.db_name);
     // IsCreateTableDone() requires METADATA on the table level.
     policy_new_table.tables.emplace_back("*");
     policy_new_table.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::CREATE}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy_new_table)));
+    ASSERT_OK(ranger_->AddPolicy(move(policy_new_table)));
 
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p,
-                                           const unique_ptr<ExternalMiniCluster>& cluster)
+  void GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p,
+                                         const unique_ptr<ExternalMiniCluster>& cluster)
         override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::METADATA}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantGetMetadataTablePrivilege(const PrivilegeParams& p,
-                                        const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantGetMetadataTablePrivilege(const PrivilegeParams& p,
+                                      const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::METADATA}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantAllTablePrivilege(const PrivilegeParams& p,
-                                const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantAllTablePrivilege(const PrivilegeParams& p,
+                              const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantAllDatabasePrivilege(const PrivilegeParams& p,
-                                   const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantAllDatabasePrivilege(const PrivilegeParams& p,
+                                 const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy db_policy;
     db_policy.databases.emplace_back(p.db_name);
     db_policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(db_policy)));
+    ASSERT_OK(ranger_->AddPolicy(move(db_policy)));
     AuthorizationPolicy tbl_policy;
     tbl_policy.databases.emplace_back(p.db_name);
     tbl_policy.tables.emplace_back("*");
     tbl_policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, false));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(tbl_policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(tbl_policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantAllWithGrantTablePrivilege(const PrivilegeParams& p,
-                                         const unique_ptr<ExternalMiniCluster>& cluster) override {
+  void GrantAllWithGrantTablePrivilege(const PrivilegeParams& p,
+                                       const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy policy;
     policy.databases.emplace_back(p.db_name);
     policy.tables.emplace_back(p.table_name);
     policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, true));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
-  Status GrantAllWithGrantDatabasePrivilege(
+  void GrantAllWithGrantDatabasePrivilege(
       const PrivilegeParams& p,
       const unique_ptr<ExternalMiniCluster>& cluster) override {
     AuthorizationPolicy db_policy;
     db_policy.databases.emplace_back(p.db_name);
     db_policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, true));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(db_policy)));
+    ASSERT_OK(ranger_->AddPolicy(move(db_policy)));
     AuthorizationPolicy tbl_policy;
     tbl_policy.databases.emplace_back(p.db_name);
     tbl_policy.tables.emplace_back("*");
     tbl_policy.items.emplace_back(PolicyItem({p.user_name}, {ActionPB::ALL}, true));
-    RETURN_NOT_OK(ranger_->AddPolicy(move(tbl_policy)));
-    return RefreshAuthzPolicies(cluster);
+    ASSERT_OK(ranger_->AddPolicy(move(tbl_policy)));
+    NO_FATALS(RefreshAuthzPolicies(cluster));
   }
 
   Status CreateTable(const OperationParams& p,
@@ -590,7 +590,7 @@ class MasterAuthzITestBase : public ExternalMiniClusterITestBase {
 
     ASSERT_OK(harness_->SetUpExternalServiceClients(cluster_));
     ASSERT_OK(harness_->SetUpCredentials());
-    ASSERT_OK(harness_->RefreshAuthzPolicies(cluster_));
+    NO_FATALS(harness_->RefreshAuthzPolicies(cluster_));
     ASSERT_OK(harness_->SetUpTables(cluster_, client_));
 
     // Log in as 'test-user' and reset the client to pick up the change in user.
@@ -608,48 +608,48 @@ class MasterAuthzITestBase : public ExternalMiniClusterITestBase {
     return harness_->GetTableLocationsWithTableId(table_name, table_id, cluster_);
   }
 
-  Status GrantCreateTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantCreateTablePrivilege(p, cluster_);
+  void GrantCreateTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantCreateTablePrivilege(p, cluster_));
   }
 
-  Status GrantDropTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantDropTablePrivilege(p, cluster_);
+  void GrantDropTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantDropTablePrivilege(p, cluster_));
   }
 
-  Status GrantAlterTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAlterTablePrivilege(p, cluster_);
+  void GrantAlterTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAlterTablePrivilege(p, cluster_));
   }
 
-  Status GrantRenameTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantRenameTablePrivilege(p, cluster_);
+  void GrantRenameTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantRenameTablePrivilege(p, cluster_));
   }
 
-  Status GrantGetMetadataTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantGetMetadataTablePrivilege(p, cluster_);
+  void GrantGetMetadataTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantGetMetadataTablePrivilege(p, cluster_));
   }
 
-  Status GrantAlterWithGrantTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAlterWithGrantTablePrivilege(p, cluster_);
+  void GrantAlterWithGrantTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAlterWithGrantTablePrivilege(p, cluster_));
   }
 
-  Status GrantAllTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAllTablePrivilege(p, cluster_);
+  void GrantAllTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAllTablePrivilege(p, cluster_));
   }
 
-  Status GrantAllDatabasePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAllDatabasePrivilege(p, cluster_);
+  void GrantAllDatabasePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAllDatabasePrivilege(p, cluster_));
   }
 
-  Status GrantAllWithGrantTablePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAllWithGrantTablePrivilege(p, cluster_);
+  void GrantAllWithGrantTablePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAllWithGrantTablePrivilege(p, cluster_));
   }
 
-  Status GrantAllWithGrantDatabasePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantAllWithGrantDatabasePrivilege(p, cluster_);
+  void GrantAllWithGrantDatabasePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantAllWithGrantDatabasePrivilege(p, cluster_));
   }
 
-  Status GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p) {
-    return harness_->GrantGetMetadataDatabasePrivilege(p, cluster_);
+  void GrantGetMetadataDatabasePrivilege(const PrivilegeParams& p) {
+    NO_FATALS(harness_->GrantGetMetadataDatabasePrivilege(p, cluster_));
   }
 
   Status CreateTable(const OperationParams& p) {
@@ -878,12 +878,12 @@ TEST_P(MasterAuthzITest, TestAuthzListTables) {
 
   // Listing tables only shows the tables which user has proper privileges on.
   tables.clear();
-  ASSERT_OK(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName }));
+  NO_FATALS(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName }));
   ASSERT_OK(this->client_->ListTables(&tables));
   ASSERT_EQ(vector<string>({ table_name }), tables);
 
   tables.clear();
-  ASSERT_OK(this->GrantGetMetadataTablePrivilege(
+  NO_FATALS(this->GrantGetMetadataTablePrivilege(
       {kDatabaseName, kSecondTable, "{OWNER}"}));
   ASSERT_OK(this->client_->ListTables(&tables));
   unordered_set<string> tables_set(tables.begin(), tables.end());
@@ -897,8 +897,8 @@ TEST_P(MasterAuthzITest, TestAuthzListTablesConcurrentRename) {
       "catalog_manager_inject_latency_list_authz_ms", "3000"));;
   const auto table_name = Substitute("$0.$1", kDatabaseName, kTableName);
   const auto sec_table_name = Substitute("$0.$1", kDatabaseName, kSecondTable);
-  ASSERT_OK(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName }));
-  ASSERT_OK(this->GrantRenameTablePrivilege({ kDatabaseName, kTableName }));
+  NO_FATALS(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName }));
+  NO_FATALS(this->GrantRenameTablePrivilege({ kDatabaseName, kTableName }));
 
   // List the tables while injecting latency.
   vector<string> tables;
@@ -921,7 +921,7 @@ TEST_P(MasterAuthzITest, TestAuthzGiveAwayOwnership) {
   // We need to grant metadata permissions to the user, otherwise the ownership
   // change would time out due to lack of privileges when checking the alter
   // table progress.
-  this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName, kTestUser });
+  NO_FATALS(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName, kTestUser }));
   const string table_name = Substitute("$0.$1", kDatabaseName, kTableName);
 
   // Change table owner.
@@ -1024,12 +1024,12 @@ TEST_P(MasterAuthzOwnerITest, TestMismatchedTable) {
   ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
   ASSERT_STR_MATCHES(s.ToString(), "[Uu]nauthorized action");
 
-  ASSERT_OK(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName, kUsername }));
+  NO_FATALS(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kTableName, kUsername }));
   s = this->GetTableLocationsWithTableId(table_name_b, table_id_a);
   ASSERT_TRUE(s.IsNotAuthorized());
   ASSERT_STR_MATCHES(s.ToString(), "[Uu]nauthorized action");
 
-  ASSERT_OK(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kSecondTable, kUsername }));
+  NO_FATALS(this->GrantGetMetadataTablePrivilege({ kDatabaseName, kSecondTable, kUsername }));
   s = this->GetTableLocationsWithTableId(table_name_b, table_id_a);
   ASSERT_TRUE(s.IsNotFound());
   ASSERT_STR_CONTAINS(s.ToString(), "the table ID refers to a different table");
@@ -1042,7 +1042,7 @@ typedef function<Status(MasterAuthzITestBase*, const OperationParams&)> Operator
 // Functor that grants the required permission for an operation (e.g Create,
 // Rename) on a table given the database the table belongs to and the name of
 // the table, if applicable.
-typedef function<Status(MasterAuthzITestBase*, const PrivilegeParams&)> PrivilegeFunc;
+typedef function<void(MasterAuthzITestBase*, const PrivilegeParams&)> PrivilegeFunc;
 
 // A description of the operation function that describes a particular action
 // on a table a user can perform, as well as the privilege granting function
@@ -1101,7 +1101,7 @@ TEST_P(TestAuthzTable, TestAuthorizeTable) {
   ASSERT_STR_MATCHES(s.ToString(), "[Uu]nauthorized action");
 
   // User 'test-user' can operate on the table after obtaining proper privileges.
-  ASSERT_OK(desc.funcs.grant_privileges(this, privilege_params));
+  NO_FATALS(desc.funcs.grant_privileges(this, privilege_params));
   ASSERT_OK(desc.funcs.do_action(this, action_params));
 
   // Ensure that operating on a table while the Authz service is unreachable fails.
@@ -1263,7 +1263,7 @@ TEST_P(AuthzErrorHandlingTest, TestNonExistentTable) {
     ASSERT_OK(StartAuthzProvider());
   }
   ASSERT_EVENTUALLY([&] {
-    ASSERT_OK(funcs.grant_privileges(this, privilege_params));
+    NO_FATALS(funcs.grant_privileges(this, privilege_params));
   });
   s = funcs.do_action(this, action_params);
   ASSERT_TRUE(s.IsNotFound()) << s.ToString();