You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/06/27 00:32:39 UTC

[1/3] kudu git commit: tools: refactor table-printing code a bit

Repository: kudu
Updated Branches:
  refs/heads/master 3c3cd3215 -> 78ef92ec8


tools: refactor table-printing code a bit

This changes the table-printing code to use a class and add some extra
safety checks that the added columns have matching numbers of rows.

Change-Id: I7e9556a14b6745f6bd07d1aee111bfa5cd9297fe
Reviewed-on: http://gerrit.cloudera.org:8080/7259
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/96ad3b07
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/96ad3b07
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/96ad3b07

Branch: refs/heads/master
Commit: 96ad3b07cf1dc694ddcfd72405aeb662440199b5
Parents: 3c3cd32
Author: Todd Lipcon <to...@cloudera.com>
Authored: Wed Jun 21 22:32:29 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Jun 27 00:16:46 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/ksck.cc                | 11 ++++++---
 src/kudu/tools/tool_action_common.cc  | 35 +++++++++++++++++++++------
 src/kudu/tools/tool_action_common.h   | 39 +++++++++++++++++++++++++++---
 src/kudu/tools/tool_action_master.cc  |  9 +++----
 src/kudu/tools/tool_action_tserver.cc | 10 +++-----
 5 files changed, 75 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/96ad3b07/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index f1eec40..9e5acf1 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -834,8 +834,7 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
     Out() << "The consensus matrix is:" << endl;
 
     // Prepare the header and columns for PrintTable.
-    const vector<string> headers{ "Config source", "Voters", "Current term",
-                                  "Config index", "Committed?" };
+    DataTable table({});
 
     // Seed the columns with the master info.
     vector<string> sources{"master"};
@@ -863,8 +862,12 @@ Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int t
       committed.emplace_back(replica.consensus_state->type == KsckConsensusConfigType::PENDING ?
                           "No" : "Yes");
     }
-    vector<vector<string>> columns{ sources, voters, terms, indexes, committed };
-    PrintTable(headers, columns, Out());
+    table.AddColumn("Config source", std::move(sources));
+    table.AddColumn("Voters", std::move(voters));
+    table.AddColumn("Current term", std::move(terms));
+    table.AddColumn("Config index", std::move(indexes));
+    table.AddColumn("Committed?", std::move(committed));
+    CHECK_OK(table.PrintTo(Out()));
   }
 
   return result;

http://git-wip-us.apache.org/repos/asf/kudu/blob/96ad3b07/src/kudu/tools/tool_action_common.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc
index 89531a8..b6ff2d2 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -480,19 +480,38 @@ void PrintTable(const vector<vector<string>>& columns, const string& separator,
 
 } // anonymous namespace
 
-Status PrintTable(const vector<string>& headers,
-                  const vector<vector<string>>& columns,
-                  ostream& out) {
+DataTable::DataTable(std::vector<string> col_names)
+    : column_names_(std::move(col_names)),
+      columns_(column_names_.size()) {
+}
+
+void DataTable::AddRow(std::vector<string> row) {
+  CHECK_EQ(row.size(), columns_.size());
+  int i = 0;
+  for (auto& v : row) {
+    columns_[i++].emplace_back(std::move(v));
+  }
+}
+
+void DataTable::AddColumn(string name, vector<string> column) {
+  if (!columns_.empty()) {
+    CHECK_EQ(column.size(), columns_[0].size());
+  }
+  column_names_.emplace_back(std::move(name));
+  columns_.emplace_back(std::move(column));
+}
+
+Status DataTable::PrintTo(ostream& out) const {
   if (boost::iequals(FLAGS_format, "pretty")) {
-    PrettyPrintTable(headers, columns, out);
+    PrettyPrintTable(column_names_, columns_, out);
   } else if (boost::iequals(FLAGS_format, "space")) {
-    PrintTable(columns, " ", out);
+    PrintTable(columns_, " ", out);
   } else if (boost::iequals(FLAGS_format, "tsv")) {
-    PrintTable(columns, "	", out);
+    PrintTable(columns_, "	", out);
   } else if (boost::iequals(FLAGS_format, "csv")) {
-    PrintTable(columns, ",", out);
+    PrintTable(columns_, ",", out);
   } else if (boost::iequals(FLAGS_format, "json")) {
-    JsonPrintTable(headers, columns, out);
+    JsonPrintTable(column_names_, columns_, out);
   } else {
     return Status::InvalidArgument("unknown format (--format)", FLAGS_format);
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/96ad3b07/src/kudu/tools/tool_action_common.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_common.h b/src/kudu/tools/tool_action_common.h
index 439ba4f..ac232b8 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -106,10 +106,41 @@ Status PrintServerTimestamp(const std::string& address, uint16_t default_port);
 Status SetServerFlag(const std::string& address, uint16_t default_port,
                      const std::string& flag, const std::string& value);
 
-// Prints a table.
-Status PrintTable(const std::vector<std::string>& headers,
-                  const std::vector<std::vector<std::string>>& columns,
-                  std::ostream& out);
+// A table of data to present to the user.
+//
+// Supports formatting based on the --format flag.
+// All data is buffered in memory before being output.
+//
+// Example usage:
+//    DataTable table({"person", "favorite color"});
+//    vector<string> cols(2);
+//    AddTableRow({"joe", "red"}, &cols);
+//    AddTableRow({"bob", "green"}, &cols);
+//    AddTableRow({"alice", "yellow"}, &cols);
+//    PrintTable(headers, cols, cout);
+class DataTable {
+ public:
+  // Construct a table with the given column names.
+  explicit DataTable(std::vector<std::string> col_names);
+
+  // Add a row of data to the table.
+  //
+  // REQUIRES: 'row.size()' matches the number of column names specified
+  // in the constructor.
+  void AddRow(std::vector<std::string> row);
+
+  // Add a column of data to the right side of the table.
+  //
+  // REQUIRES: if any rows have been added already, the length of this column
+  // must match the length of all existing columns.
+  void AddColumn(std::string name, std::vector<std::string> column);
+
+  // Print the table to 'out'.
+  Status PrintTo(std::ostream& out) const WARN_UNUSED_RESULT;
+ private:
+  std::vector<std::string> column_names_;
+  std::vector<std::vector<std::string>> columns_;
+};
 
 // Wrapper around a Kudu client which allows calling proxy methods on the leader
 // master.

http://git-wip-us.apache.org/repos/asf/kudu/blob/96ad3b07/src/kudu/tools/tool_action_master.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_master.cc b/src/kudu/tools/tool_action_master.cc
index b6d7704..ef71a24 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -88,8 +88,7 @@ Status ListMasters(const RunnerContext& context) {
     return StatusFromPB(resp.error().status());
   }
 
-  vector<string> headers;
-  vector<vector<string>> columns;
+  DataTable table({});
 
   vector<ServerEntryPB> masters;
   std::copy_if(resp.masters().begin(), resp.masters().end(), std::back_inserter(masters),
@@ -107,7 +106,6 @@ Status ListMasters(const RunnerContext& context) {
   };
 
   for (const auto& column : strings::Split(FLAGS_columns, ",", strings::SkipEmpty())) {
-    headers.push_back(column.ToString());
     vector<string> values;
     if (boost::iequals(column, "uuid")) {
       for (const auto& master : masters) {
@@ -136,11 +134,10 @@ Status ListMasters(const RunnerContext& context) {
     } else {
       return Status::InvalidArgument("unknown column (--columns)", column);
     }
-
-    columns.emplace_back(std::move(values));
+    table.AddColumn(column.ToString(), std::move(values));
   }
 
-  RETURN_NOT_OK(PrintTable(headers, columns, cout));
+  RETURN_NOT_OK(table.PrintTo(cout));
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/96ad3b07/src/kudu/tools/tool_action_tserver.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_tserver.cc b/src/kudu/tools/tool_action_tserver.cc
index 4a5efe0..3703103 100644
--- a/src/kudu/tools/tool_action_tserver.cc
+++ b/src/kudu/tools/tool_action_tserver.cc
@@ -91,9 +91,7 @@ Status ListTServers(const RunnerContext& context) {
     return StatusFromPB(resp.error().status());
   }
 
-  vector<string> headers;
-  vector<vector<string>> columns;
-
+  DataTable table({});
   const auto& servers = resp.servers();
 
   auto hostport_to_string = [](const HostPortPB& hostport) {
@@ -101,7 +99,6 @@ Status ListTServers(const RunnerContext& context) {
   };
 
   for (const auto& column : strings::Split(FLAGS_columns, ",", strings::SkipEmpty())) {
-    headers.emplace_back(column.ToString());
     vector<string> values;
     if (boost::iequals(column, "uuid")) {
       for (const auto& server : servers) {
@@ -134,11 +131,10 @@ Status ListTServers(const RunnerContext& context) {
     } else {
       return Status::InvalidArgument("unknown column (--columns)", column);
     }
-
-    columns.emplace_back(std::move(values));
+    table.AddColumn(column.ToString(), std::move(values));
   }
 
-  RETURN_NOT_OK(PrintTable(headers, columns, cout));
+  RETURN_NOT_OK(table.PrintTo(cout));
   return Status::OK();
 }
 


[2/3] kudu git commit: tools: add a tool to summarize data size on a tablet or tablets

Posted by to...@apache.org.
tools: add a tool to summarize data size on a tablet or tablets

This adds 'kudu local_replica data_size <tablet glob>' which lists the
space usage of a tablet, grouped by type of file, column, rowset, etc.

This would be even more useful in an online context, but it's simpler to
implement offline and still useful for understanding space usage.

Change-Id: Ibf0237f9d01f4ec332d1df545d2c2f51f64ce012
Reviewed-on: http://gerrit.cloudera.org:8080/5727
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c19b8f4a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c19b8f4a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c19b8f4a

Branch: refs/heads/master
Commit: c19b8f4a1a271af1efb5a01bdf05005d79bb85f6
Parents: 96ad3b0
Author: Todd Lipcon <to...@cloudera.com>
Authored: Wed Jun 21 22:33:57 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Jun 27 00:28:43 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-tool-test.cc            |  58 +++++++++-
 src/kudu/tools/tool_action_local_replica.cc | 135 +++++++++++++++++++++++
 2 files changed, 191 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c19b8f4a/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 6d2327f..1dd63a1 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -48,6 +48,7 @@
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/strip.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/external_mini_cluster.h"
@@ -151,11 +152,11 @@ class ToolTest : public KuduTest {
     Status s = Subprocess::Call(args, "", &out, &err);
     if (stdout) {
       *stdout = out;
-      StripWhiteSpace(stdout);
+      StripTrailingNewline(stdout);
     }
     if (stderr) {
       *stderr = err;
-      StripWhiteSpace(stderr);
+      StripTrailingNewline(stderr);
     }
     if (stdout_lines) {
       *stdout_lines = strings::Split(out, "\n", strings::SkipEmpty());
@@ -386,6 +387,7 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     const vector<string> kLocalReplicaModeRegexes = {
         "cmeta.*Operate on a local tablet replica's consensus",
+        "data_size.*Summarize the data size",
         "dump.*Dump a Kudu filesystem",
         "copy_from_remote.*Copy a tablet replica",
         "delete.*Delete a tablet replica from the local filesystem",
@@ -1110,6 +1112,58 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
   }
   {
     string stdout;
+    NO_FATALS(RunActionStdoutString(
+        Substitute("local_replica data_size $0 $1",
+                   kTestTablet, fs_paths), &stdout));
+    SCOPED_TRACE(stdout);
+
+    string expected = R"(
+    table id     |  tablet id  | rowset id |    block type    | size
+-----------------+-------------+-----------+------------------+------
+ KuduTableTestId | test-tablet | 0         | c10 (key)        | 164B
+ KuduTableTestId | test-tablet | 0         | c11 (int_val)    | 113B
+ KuduTableTestId | test-tablet | 0         | c12 (string_val) | 138B
+ KuduTableTestId | test-tablet | 0         | REDO             | 0B
+ KuduTableTestId | test-tablet | 0         | UNDO             | 169B
+ KuduTableTestId | test-tablet | 0         | BLOOM            | 4.1K
+ KuduTableTestId | test-tablet | 0         | PK               | 0B
+ KuduTableTestId | test-tablet | 0         | *                | 4.6K
+ KuduTableTestId | test-tablet | *         | c10 (key)        | 164B
+ KuduTableTestId | test-tablet | *         | c11 (int_val)    | 113B
+ KuduTableTestId | test-tablet | *         | c12 (string_val) | 138B
+ KuduTableTestId | test-tablet | *         | REDO             | 0B
+ KuduTableTestId | test-tablet | *         | UNDO             | 169B
+ KuduTableTestId | test-tablet | *         | BLOOM            | 4.1K
+ KuduTableTestId | test-tablet | *         | PK               | 0B
+ KuduTableTestId | test-tablet | *         | *                | 4.6K
+ KuduTableTestId | *           | *         | c10 (key)        | 164B
+ KuduTableTestId | *           | *         | c11 (int_val)    | 113B
+ KuduTableTestId | *           | *         | c12 (string_val) | 138B
+ KuduTableTestId | *           | *         | REDO             | 0B
+ KuduTableTestId | *           | *         | UNDO             | 169B
+ KuduTableTestId | *           | *         | BLOOM            | 4.1K
+ KuduTableTestId | *           | *         | PK               | 0B
+ KuduTableTestId | *           | *         | *                | 4.6K
+)";
+    // Preprocess stdout and our expected table so that we are less
+    // sensitive to small variations in encodings, id assignment, etc.
+    for (string* p : {&stdout, &expected}) {
+      // Replace any string of digits with a single '#'.
+      StripString(p, "0123456789.", '#');
+      StripDupCharacters(p, '#', 0);
+      // Collapse whitespace to a single space.
+      StripDupCharacters(p, ' ', 0);
+      // Strip the leading and trailing whitespace.
+      StripWhiteSpace(p);
+      // Collapse '-'s to a single '-' so that different width columns
+      // don't change the width of the header line.
+      StripDupCharacters(p, '-', 0);
+    }
+
+    EXPECT_EQ(stdout, expected);
+  }
+  {
+    string stdout;
     NO_FATALS(RunActionStdoutString(Substitute("local_replica list $0",
                                                fs_paths), &stdout));
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/c19b8f4a/src/kudu/tools/tool_action_local_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index 0cb438c..0c92eeb 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -131,6 +131,11 @@ const char* const kSeparatorLine =
 
 const char* const kTermArg = "term";
 
+const char* const kTabletIdGlobArg = "tablet_id_pattern";
+const char* const kTabletIdGlobArgDesc = "Tablet identifier pattern. "
+    "This argument supports basic glob syntax: '*' matches 0 or more wildcard "
+    "characters.";
+
 string Indent(int indent) {
   return string(indent, ' ');
 }
@@ -371,6 +376,126 @@ Status DeleteLocalReplica(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status SummarizeSize(FsManager* fs,
+                     const vector<BlockId>& blocks,
+                     StringPiece block_type,
+                     int64_t* running_sum) {
+  int64_t local_sum = 0;
+  for (const auto& b : blocks) {
+    unique_ptr<fs::ReadableBlock> rb;
+    RETURN_NOT_OK_PREPEND(fs->OpenBlock(b, &rb),
+                          Substitute("could not open block $0", b.ToString()));
+    uint64_t size = 0;
+    RETURN_NOT_OK_PREPEND(rb->Size(&size),
+                          Substitute("could not get size for block $0", b.ToString()));
+    local_sum += size;
+    if (VLOG_IS_ON(1)) {
+      cout << Substitute("$0 block $1: $2 bytes $3",
+                         block_type, b.ToString(),
+                         size, HumanReadableNumBytes::ToString(size)) << endl;
+    }
+  }
+  *running_sum += local_sum;
+  return Status::OK();
+}
+
+namespace {
+struct TabletSizeStats {
+  int64_t redo_bytes = 0;
+  int64_t undo_bytes = 0;
+  int64_t bloom_bytes = 0;
+  int64_t pk_index_bytes = 0;
+  map<string, int64_t, autodigit_less> column_bytes;
+
+  void Add(const TabletSizeStats& other) {
+    redo_bytes += other.redo_bytes;
+    undo_bytes += other.undo_bytes;
+    bloom_bytes += other.bloom_bytes;
+    pk_index_bytes += other.pk_index_bytes;
+    for (const auto& p : other.column_bytes) {
+      column_bytes[p.first] += p.second;
+    }
+  }
+
+  void AddToTable(const string& table_id,
+                  const string& tablet_id,
+                  const string& rowset_id,
+                  DataTable* table) const {
+    vector<pair<string, int64_t>> to_print(column_bytes.begin(), column_bytes.end());
+    to_print.emplace_back("REDO", redo_bytes);
+    to_print.emplace_back("UNDO", undo_bytes);
+    to_print.emplace_back("BLOOM", bloom_bytes);
+    to_print.emplace_back("PK", pk_index_bytes);
+
+    int64_t total = 0;
+    for (const auto& e : to_print) {
+      table->AddRow({table_id, tablet_id, rowset_id, e.first,
+              HumanReadableNumBytes::ToString(e.second)});
+      total += e.second;
+    }
+    table->AddRow({table_id, tablet_id, rowset_id, "*", HumanReadableNumBytes::ToString(total)});
+  }
+};
+} // anonymous namespace
+
+Status SummarizeDataSize(const RunnerContext& context) {
+  const string& tablet_id_pattern = FindOrDie(context.required_args, kTabletIdGlobArg);
+  unique_ptr<FsManager> fs;
+  RETURN_NOT_OK(FsInit(&fs));
+
+  vector<string> tablets;
+  RETURN_NOT_OK(fs->ListTabletIds(&tablets));
+
+  unordered_map<string, TabletSizeStats> size_stats_by_table_id;
+
+  DataTable output_table({ "table id", "tablet id", "rowset id", "block type", "size" });
+
+  for (const string& tablet_id : tablets) {
+    TabletSizeStats tablet_stats;
+    if (!MatchPattern(tablet_id, tablet_id_pattern)) continue;
+    scoped_refptr<TabletMetadata> meta;
+    RETURN_NOT_OK_PREPEND(TabletMetadata::Load(fs.get(), tablet_id, &meta),
+                          Substitute("could not load tablet metadata for $0", tablet_id));
+    const string& table_id = meta->table_id();
+    for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets()) {
+      TabletSizeStats rowset_stats;
+      RETURN_NOT_OK(SummarizeSize(fs.get(), rs_meta->redo_delta_blocks(),
+                                  "REDO", &rowset_stats.redo_bytes));
+      RETURN_NOT_OK(SummarizeSize(fs.get(), rs_meta->undo_delta_blocks(),
+                                  "UNDO", &rowset_stats.undo_bytes));
+      RETURN_NOT_OK(SummarizeSize(fs.get(), { rs_meta->bloom_block() },
+                                  "Bloom", &rowset_stats.bloom_bytes));
+      if (rs_meta->has_adhoc_index_block()) {
+        RETURN_NOT_OK(SummarizeSize(fs.get(), { rs_meta->adhoc_index_block() },
+                                    "PK index", &rowset_stats.pk_index_bytes));
+      }
+      const auto& column_blocks_by_id = rs_meta->GetColumnBlocksById();
+      for (const auto& e : column_blocks_by_id) {
+        const auto& col_id = e.first;
+        const auto& block = e.second;
+        const auto& col_idx = meta->schema().find_column_by_id(col_id);
+        string col_key = Substitute(
+            "c$0 ($1)", col_id,
+            (col_idx != Schema::kColumnNotFound) ?
+                meta->schema().column(col_idx).name() : "?");
+        RETURN_NOT_OK(SummarizeSize(
+            fs.get(), { block }, col_key, &rowset_stats.column_bytes[col_key]));
+      }
+      rowset_stats.AddToTable(table_id, tablet_id, std::to_string(rs_meta->id()), &output_table);
+      tablet_stats.Add(rowset_stats);
+    }
+    tablet_stats.AddToTable(table_id, tablet_id, "*", &output_table);
+    size_stats_by_table_id[table_id].Add(tablet_stats);
+  }
+  for (const auto& e : size_stats_by_table_id) {
+    const auto& table_id = e.first;
+    const auto& stats = e.second;
+    stats.AddToTable(table_id, "*", "*", &output_table);
+  }
+  RETURN_NOT_OK(output_table.PrintTo(cout));
+  return Status::OK();
+}
+
 Status DumpWals(const RunnerContext& context) {
   unique_ptr<FsManager> fs_manager;
   RETURN_NOT_OK(FsInit(&fs_manager));
@@ -847,10 +972,20 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddOptionalParameter("clean_unsafe")
       .Build();
 
+  unique_ptr<Action> data_size =
+      ActionBuilder("data_size", &SummarizeDataSize)
+      .Description("Summarize the data size/space usage of the given local replica(s).")
+      .AddRequiredParameter({ kTabletIdGlobArg, kTabletIdGlobArgDesc })
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("fs_data_dirs")
+      .AddOptionalParameter("format")
+      .Build();
+
   return ModeBuilder("local_replica")
       .Description("Operate on local tablet replicas via the local filesystem")
       .AddMode(std::move(cmeta))
       .AddAction(std::move(copy_from_remote))
+      .AddAction(std::move(data_size))
       .AddAction(std::move(delete_local_replica))
       .AddAction(std::move(list))
       .AddMode(BuildDumpMode())


[3/3] kudu git commit: tools: add a tool to extract an arbitrary block to stdout

Posted by to...@apache.org.
tools: add a tool to extract an arbitrary block to stdout

This allows a block to be dumped to stdout. I found this useful when
trying to see how compressible an index block was. We might also find
this useful for debugging block corruption issues on production
clusters -- it's much easier to extract a corrupted block and copy it
around than to try to copy an entire tablet or filesystem.

Change-Id: I04cf75169b1adb721791ca15b2d5ab5067025f06
Reviewed-on: http://gerrit.cloudera.org:8080/7260
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/78ef92ec
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/78ef92ec
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/78ef92ec

Branch: refs/heads/master
Commit: 78ef92ec807b8c2f5e91c0ddb70dbba0ebe542b9
Parents: c19b8f4
Author: Todd Lipcon <to...@cloudera.com>
Authored: Wed Jun 21 22:34:11 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Jun 27 00:28:51 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-tool-test.cc | 22 +++++++++++++++
 src/kudu/tools/tool_action_fs.cc | 53 +++++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/78ef92ec/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 1dd63a1..cc2687c 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -377,6 +377,7 @@ TEST_F(ToolTest, TestModeHelp) {
   }
   {
     const vector<string> kFsDumpModeRegexes = {
+        "block.*binary contents of a data block",
         "cfile.*contents of a CFile",
         "tree.*tree of a Kudu filesystem",
         "uuid.*UUID of a Kudu filesystem"
@@ -818,6 +819,27 @@ TEST_F(ToolTest, TestFsDumpCFile) {
   }
 }
 
+TEST_F(ToolTest, TestFsDumpBlock) {
+  const string kTestDir = GetTestPath("test");
+  FsManager fs(env_, kTestDir);
+  ASSERT_OK(fs.CreateInitialFileSystemLayout());
+  ASSERT_OK(fs.Open());
+
+  unique_ptr<WritableBlock> block;
+  ASSERT_OK(fs.CreateNewBlock({}, &block));
+  ASSERT_OK(block->Append("hello world"));
+  ASSERT_OK(block->Close());
+  BlockId block_id = block->id();
+
+  {
+    string stdout;
+    NO_FATALS(RunActionStdoutString(Substitute(
+        "fs dump block --fs_wal_dir=$0 $1",
+        kTestDir, block_id.ToString()), &stdout));
+    ASSERT_EQ("hello world", stdout);
+  }
+}
+
 TEST_F(ToolTest, TestWalDump) {
   const string kTestDir = GetTestPath("test");
   const string kTestTablet = "test-tablet";

http://git-wip-us.apache.org/repos/asf/kudu/blob/78ef92ec/src/kudu/tools/tool_action_fs.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc
index fc4ef59..a0c9dfe 100644
--- a/src/kudu/tools/tool_action_fs.cc
+++ b/src/kudu/tools/tool_action_fs.cc
@@ -173,14 +173,21 @@ Status DumpUuid(const RunnerContext& /*context*/) {
   return Status::OK();
 }
 
-Status DumpCFile(const RunnerContext& context) {
+Status ParseBlockIdArg(const RunnerContext& context,
+                       BlockId* id) {
   const string& block_id_str = FindOrDie(context.required_args, "block_id");
   uint64_t numeric_id;
   if (!safe_strtou64(block_id_str, &numeric_id)) {
     return Status::InvalidArgument(Substitute(
         "Could not parse $0 as numeric block ID", block_id_str));
   }
-  BlockId block_id(numeric_id);
+  *id = BlockId(numeric_id);
+  return Status::OK();
+}
+
+Status DumpCFile(const RunnerContext& context) {
+  BlockId block_id;
+  RETURN_NOT_OK(ParseBlockIdArg(context, &block_id));
 
   FsManagerOpts fs_opts;
   fs_opts.read_only = true;
@@ -209,6 +216,35 @@ Status DumpCFile(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status DumpBlock(const RunnerContext& context) {
+  BlockId block_id;
+  RETURN_NOT_OK(ParseBlockIdArg(context, &block_id));
+
+  FsManagerOpts fs_opts;
+  fs_opts.read_only = true;
+  FsManager fs_manager(Env::Default(), fs_opts);
+  RETURN_NOT_OK(fs_manager.Open());
+
+  unique_ptr<fs::ReadableBlock> block;
+  RETURN_NOT_OK(fs_manager.OpenBlock(block_id, &block));
+
+  uint64_t size = 0;
+  RETURN_NOT_OK_PREPEND(block->Size(&size), "couldn't get block size");
+
+  faststring buf;
+  uint64_t offset = 0;
+  while (offset < size) {
+    int64_t chunk = std::min<int64_t>(size - offset, 64 * 1024);
+    buf.resize(chunk);
+    Slice s(buf);
+    RETURN_NOT_OK(block->Read(offset, &s));
+    offset += s.size();
+    cout << s.ToString();
+  }
+
+  return Status::OK();
+}
+
 Status DumpFsTree(const RunnerContext& /*context*/) {
   FsManagerOpts fs_opts;
   fs_opts.read_only = true;
@@ -225,6 +261,8 @@ static unique_ptr<Mode> BuildFsDumpMode() {
   unique_ptr<Action> dump_cfile =
       ActionBuilder("cfile", &DumpCFile)
       .Description("Dump the contents of a CFile (column file)")
+      .ExtraDescription("This interprets the contents of a CFile-formatted block "
+                        "and outputs the decoded row data.")
       .AddRequiredParameter({ "block_id", "block identifier" })
       .AddOptionalParameter("fs_wal_dir")
       .AddOptionalParameter("fs_data_dirs")
@@ -232,6 +270,16 @@ static unique_ptr<Mode> BuildFsDumpMode() {
       .AddOptionalParameter("print_rows")
       .Build();
 
+  unique_ptr<Action> dump_block =
+      ActionBuilder("block", &DumpBlock)
+      .Description("Dump the binary contents of a data block")
+      .ExtraDescription("This performs no parsing or interpretation of the data stored "
+                        "in the block but rather outputs its binary contents directly.")
+      .AddRequiredParameter({ "block_id", "block identifier" })
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("fs_data_dirs")
+      .Build();
+
   unique_ptr<Action> dump_tree =
       ActionBuilder("tree", &DumpFsTree)
       .Description("Dump the tree of a Kudu filesystem")
@@ -248,6 +296,7 @@ static unique_ptr<Mode> BuildFsDumpMode() {
 
   return ModeBuilder("dump")
       .Description("Dump a Kudu filesystem")
+      .AddAction(std::move(dump_block))
       .AddAction(std::move(dump_cfile))
       .AddAction(std::move(dump_tree))
       .AddAction(std::move(dump_uuid))