You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2022/12/28 15:06:16 UTC

[kudu] branch master updated: [tools] Support to dump rowset primary key bounds and in readable format

This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new f855dcb6d [tools] Support to dump rowset primary key bounds and in readable format
f855dcb6d is described below

commit f855dcb6d9afa21db79d19e1429005270e828f37
Author: Yingchun Lai <la...@apache.org>
AuthorDate: Sat Dec 17 19:26:07 2022 +0800

    [tools] Support to dump rowset primary key bounds and in readable format
    
    This patch adds features for CLI tool 'kudu local_replica dump rowset',
    including:
    1. Dump rowsets' primary keys in readable format by setting
       flag --use_readable_format.
    2. Dump rowsets' primary key bounds only instead of all rows
       by setting flag --dump_primary_key_bounds_only.
    
    Change-Id: I9757104f96648c3c83b931369f0e377d8dc2079a
    Reviewed-on: http://gerrit.cloudera.org:8080/19370
    Tested-by: Alexey Serbin <al...@apache.org>
    Reviewed-by: Alexey Serbin <al...@apache.org>
---
 src/kudu/tools/kudu-tool-test.cc            | 86 ++++++++++++++++++++++++-----
 src/kudu/tools/tool_action_local_replica.cc | 52 ++++++++++++++++-
 2 files changed, 120 insertions(+), 18 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 63e0a4942..c98811f65 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2799,8 +2799,8 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
   {
     string stdout;
     NO_FATALS(RunActionStdoutString(
-        Substitute("local_replica dump rowset $0 $1 $2",
-                   kTestTablet, fs_paths, encryption_args), &stdout));
+        Substitute("local_replica dump rowset $0 $1 $2", kTestTablet, fs_paths, encryption_args),
+        &stdout));
 
     SCOPED_TRACE(stdout);
     ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
@@ -2815,26 +2815,82 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
     ASSERT_STR_CONTAINS(stdout, "undo_deltas {");
 
     ASSERT_STR_CONTAINS(stdout,
-                       "RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0,"
-                       " string string_val=\"HelloWorld\"); "
-                       "Undo Mutations: [@1(DELETE)]; Redo Mutations: [];");
+                        "RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0,"
+                        " string string_val=\"HelloWorld\"); "
+                        "Undo Mutations: [@1(DELETE)]; Redo Mutations: [];");
     ASSERT_STR_MATCHES(stdout, ".*---------------------.*");
-
+  }
+  {
     // This is expected to fail with Invalid argument for kRowId.
+    string stdout;
     string stderr;
-    Status s = RunTool(
-        Substitute("local_replica dump rowset $0 $1 --rowset_index=$2 $3",
-                   kTestTablet, fs_paths, kRowId, encryption_args),
-                   &stdout, &stderr, nullptr, nullptr);
+    Status s = RunTool(Substitute("local_replica dump rowset $0 $1 --rowset_index=$2 $3",
+                                  kTestTablet,
+                                  fs_paths,
+                                  kRowId,
+                                  encryption_args),
+                       &stdout,
+                       &stderr,
+                       nullptr,
+                       nullptr);
     ASSERT_TRUE(s.IsRuntimeError());
     SCOPED_TRACE(stderr);
-    string expected = "Could not find rowset " + SimpleItoa(kRowId) +
-        " in tablet id " + kTestTablet;
+    string expected =
+        "Could not find rowset " + SimpleItoa(kRowId) + " in tablet id " + kTestTablet;
     ASSERT_STR_CONTAINS(stderr, expected);
+  }
+  {
+    // Dump rowsets' primary keys in comparable format.
+    string stdout;
+    NO_FATALS(RunActionStdoutString(Substitute("local_replica dump rowset --nodump_all_columns "
+                                               "--nodump_metadata --nrows=15 $0 $1 $2",
+                                               kTestTablet,
+                                               fs_paths,
+                                               encryption_args),
+                                    &stdout));
 
+    SCOPED_TRACE(stdout);
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
+    ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
+    for (int row_idx = 0; row_idx < 30; row_idx++) {
+      string row_key = StringPrintf("800000%02x", row_idx);
+      if (row_idx < 15) {
+        ASSERT_STR_CONTAINS(stdout, row_key);
+      } else {
+        ASSERT_STR_NOT_CONTAINS(stdout, row_key);
+      }
+    }
+  }
+
+  {
+    // Dump rowsets' primary keys in human readable format.
+    string stdout;
+    NO_FATALS(
+        RunActionStdoutString(Substitute("local_replica dump rowset --nodump_all_columns "
+                                         "--nodump_metadata --use_readable_format $0 $1 $2",
+                                         kTestTablet,
+                                         fs_paths,
+                                         encryption_args),
+                              &stdout));
+
+    SCOPED_TRACE(stdout);
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
+    ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
+    for (int row_idx = 0; row_idx < 30; row_idx++) {
+      ASSERT_STR_CONTAINS(stdout, Substitute("(int32 key=$0)", row_idx));
+    }
+  }
+  {
+    // Dump rowsets' primary key bounds only.
+    string stdout;
     NO_FATALS(RunActionStdoutString(
         Substitute("local_replica dump rowset --nodump_all_columns "
-                   "--nodump_metadata --nrows=15 $0 $1 $2",
+                   "--nodump_metadata --use_readable_format "
+                   "--dump_primary_key_bounds_only $0 $1 $2",
                    kTestTablet, fs_paths, encryption_args), &stdout));
 
     SCOPED_TRACE(stdout);
@@ -2843,8 +2899,8 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
     ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
     ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
     for (int row_idx = 0; row_idx < 30; row_idx++) {
-      string row_key = StringPrintf("800000%02x", row_idx);
-      if (row_idx < 15) {
+      string row_key = Substitute("(int32 key=$0)", row_idx);
+      if (row_idx % 10 == 0 || row_idx % 10 == 9) {
         ASSERT_STR_CONTAINS(stdout, row_key);
       } else {
         ASSERT_STR_NOT_CONTAINS(stdout, row_key);
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index 1f730b314..a764a56cf 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -84,6 +84,7 @@
 #include "kudu/util/env.h"
 #include "kudu/util/env_util.h"
 #include "kudu/util/faststring.h"
+#include "kudu/util/flag_validators.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
@@ -103,6 +104,11 @@ class Messenger;
 DEFINE_bool(dump_all_columns, true,
             "If true, dumped rows include all of the columns in the rowset. If "
             "false, dumped rows include just the key columns (in a comparable format).");
+DEFINE_bool(use_readable_format, false,
+            "Whether to dump primary key in human readable format, otherwise, dump primary "
+            "key in comparable format.");
+DEFINE_bool(dump_primary_key_bounds_only, false,
+            "Whether to dump rowset primary key bounds only, otherwise, dump all rows.");
 DEFINE_bool(dump_metadata, true,
             "If true, dumps rowset metadata before dumping data. If false, "
             "only dumps the data.");
@@ -188,6 +194,24 @@ using strings::Substitute;
 namespace kudu {
 namespace tools {
 
+bool ValidateDumpRowset()  {
+  if (FLAGS_dump_all_columns) {
+    if (FLAGS_use_readable_format) {
+      LOG(ERROR) << "Flag --use_readable_format is meaningless "
+                    "when --dump_all_columns is enabled.";
+      return false;
+    }
+
+    if (FLAGS_dump_primary_key_bounds_only) {
+      LOG(ERROR) << "Flag --dump_primary_key_bounds_only is meaningless "
+                    "when --dump_all_columns is enabled.";
+      return false;
+    }
+  }
+  return true;
+}
+GROUP_FLAG_VALIDATOR(validate_dump_rowset, ValidateDumpRowset);
+
 namespace {
 
 constexpr const char* const kSeparatorLine =
@@ -764,6 +788,15 @@ struct TabletSizeStats {
     table->AddRow({table_id, tablet_id, rowset_id, "*", HumanReadableNumBytes::ToString(total)});
   }
 };
+
+string DumpRow(const Schema& key_proj, const RowBlockRow& row, faststring* key) {
+  if (FLAGS_use_readable_format) {
+    return key_proj.DebugRowKey(row);
+  } else {
+    key_proj.EncodeComparableKey(row, key);
+    return strings::b2a_hex(key->ToString());
+  }
+}
 } // anonymous namespace
 
 Status SummarizeDataSize(const RunnerContext& context) {
@@ -980,14 +1013,27 @@ Status DumpRowSetInternal(const IOContext& ctx,
     RowBlockMemory mem(1024);
     RowBlock block(&key_proj, 100, &mem);
     faststring key;
+    string lower_bound;
+    string current_upper_bound;
     while (it->HasNext()) {
       mem.Reset();
       RETURN_NOT_OK(it->NextBlock(&block));
-      for (int i = 0; i < block.nrows(); i++) {
-        key_proj.EncodeComparableKey(block.row(i), &key);
-        lines.emplace_back(strings::b2a_hex(key.ToString()));
+      if (FLAGS_dump_primary_key_bounds_only) {
+        if (lower_bound.empty()) {
+          lower_bound = DumpRow(key_proj, block.row(0), &key);
+        }
+        CHECK_GT(block.nrows(), 0);
+        current_upper_bound = DumpRow(key_proj, block.row(block.nrows() - 1), &key);
+      } else {
+        for (int i = 0; i < block.nrows(); i++) {
+          lines.emplace_back(DumpRow(key_proj, block.row(i), &key));
+        }
       }
     }
+    if (FLAGS_dump_primary_key_bounds_only) {
+      lines.emplace_back(lower_bound);
+      lines.emplace_back(current_upper_bound);
+    }
   }
 
   // Respect 'rows_left' when dumping the output.