You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2022/12/28 15:06:16 UTC
[kudu] branch master updated: [tools] Support to dump rowset primary key bounds and in readable format
This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new f855dcb6d [tools] Support to dump rowset primary key bounds and in readable format
f855dcb6d is described below
commit f855dcb6d9afa21db79d19e1429005270e828f37
Author: Yingchun Lai <la...@apache.org>
AuthorDate: Sat Dec 17 19:26:07 2022 +0800
[tools] Support to dump rowset primary key bounds and in readable format
This patch adds features for CLI tool 'kudu local_replica dump rowset',
including:
1. Dump rowsets' primary keys in readable format by setting
flag --use_readable_format.
2. Dump rowsets' primary key bounds only instead of all rows
by setting flag --dump_primary_key_bounds_only.
Change-Id: I9757104f96648c3c83b931369f0e377d8dc2079a
Reviewed-on: http://gerrit.cloudera.org:8080/19370
Tested-by: Alexey Serbin <al...@apache.org>
Reviewed-by: Alexey Serbin <al...@apache.org>
---
src/kudu/tools/kudu-tool-test.cc | 86 ++++++++++++++++++++++++-----
src/kudu/tools/tool_action_local_replica.cc | 52 ++++++++++++++++-
2 files changed, 120 insertions(+), 18 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 63e0a4942..c98811f65 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2799,8 +2799,8 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
{
string stdout;
NO_FATALS(RunActionStdoutString(
- Substitute("local_replica dump rowset $0 $1 $2",
- kTestTablet, fs_paths, encryption_args), &stdout));
+ Substitute("local_replica dump rowset $0 $1 $2", kTestTablet, fs_paths, encryption_args),
+ &stdout));
SCOPED_TRACE(stdout);
ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
@@ -2815,26 +2815,82 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
ASSERT_STR_CONTAINS(stdout, "undo_deltas {");
ASSERT_STR_CONTAINS(stdout,
- "RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0,"
- " string string_val=\"HelloWorld\"); "
- "Undo Mutations: [@1(DELETE)]; Redo Mutations: [];");
+ "RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0,"
+ " string string_val=\"HelloWorld\"); "
+ "Undo Mutations: [@1(DELETE)]; Redo Mutations: [];");
ASSERT_STR_MATCHES(stdout, ".*---------------------.*");
-
+ }
+ {
// This is expected to fail with Invalid argument for kRowId.
+ string stdout;
string stderr;
- Status s = RunTool(
- Substitute("local_replica dump rowset $0 $1 --rowset_index=$2 $3",
- kTestTablet, fs_paths, kRowId, encryption_args),
- &stdout, &stderr, nullptr, nullptr);
+ Status s = RunTool(Substitute("local_replica dump rowset $0 $1 --rowset_index=$2 $3",
+ kTestTablet,
+ fs_paths,
+ kRowId,
+ encryption_args),
+ &stdout,
+ &stderr,
+ nullptr,
+ nullptr);
ASSERT_TRUE(s.IsRuntimeError());
SCOPED_TRACE(stderr);
- string expected = "Could not find rowset " + SimpleItoa(kRowId) +
- " in tablet id " + kTestTablet;
+ string expected =
+ "Could not find rowset " + SimpleItoa(kRowId) + " in tablet id " + kTestTablet;
ASSERT_STR_CONTAINS(stderr, expected);
+ }
+ {
+ // Dump rowsets' primary keys in comparable format.
+ string stdout;
+ NO_FATALS(RunActionStdoutString(Substitute("local_replica dump rowset --nodump_all_columns "
+ "--nodump_metadata --nrows=15 $0 $1 $2",
+ kTestTablet,
+ fs_paths,
+ encryption_args),
+ &stdout));
+ SCOPED_TRACE(stdout);
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1");
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
+ ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
+ for (int row_idx = 0; row_idx < 30; row_idx++) {
+ string row_key = StringPrintf("800000%02x", row_idx);
+ if (row_idx < 15) {
+ ASSERT_STR_CONTAINS(stdout, row_key);
+ } else {
+ ASSERT_STR_NOT_CONTAINS(stdout, row_key);
+ }
+ }
+ }
+
+ {
+ // Dump rowsets' primary keys in human readable format.
+ string stdout;
+ NO_FATALS(
+ RunActionStdoutString(Substitute("local_replica dump rowset --nodump_all_columns "
+ "--nodump_metadata --use_readable_format $0 $1 $2",
+ kTestTablet,
+ fs_paths,
+ encryption_args),
+ &stdout));
+
+ SCOPED_TRACE(stdout);
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1");
+ ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
+ ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
+ for (int row_idx = 0; row_idx < 30; row_idx++) {
+ ASSERT_STR_CONTAINS(stdout, Substitute("(int32 key=$0)", row_idx));
+ }
+ }
+ {
+ // Dump rowsets' primary key bounds only.
+ string stdout;
NO_FATALS(RunActionStdoutString(
Substitute("local_replica dump rowset --nodump_all_columns "
- "--nodump_metadata --nrows=15 $0 $1 $2",
+ "--nodump_metadata --use_readable_format "
+ "--dump_primary_key_bounds_only $0 $1 $2",
kTestTablet, fs_paths, encryption_args), &stdout));
SCOPED_TRACE(stdout);
@@ -2843,8 +2899,8 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
for (int row_idx = 0; row_idx < 30; row_idx++) {
- string row_key = StringPrintf("800000%02x", row_idx);
- if (row_idx < 15) {
+ string row_key = Substitute("(int32 key=$0)", row_idx);
+ if (row_idx % 10 == 0 || row_idx % 10 == 9) {
ASSERT_STR_CONTAINS(stdout, row_key);
} else {
ASSERT_STR_NOT_CONTAINS(stdout, row_key);
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index 1f730b314..a764a56cf 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -84,6 +84,7 @@
#include "kudu/util/env.h"
#include "kudu/util/env_util.h"
#include "kudu/util/faststring.h"
+#include "kudu/util/flag_validators.h"
#include "kudu/util/locks.h"
#include "kudu/util/metrics.h"
#include "kudu/util/monotime.h"
@@ -103,6 +104,11 @@ class Messenger;
DEFINE_bool(dump_all_columns, true,
"If true, dumped rows include all of the columns in the rowset. If "
"false, dumped rows include just the key columns (in a comparable format).");
+DEFINE_bool(use_readable_format, false,
+ "Whether to dump primary key in human readable format, otherwise, dump primary "
+ "key in comparable format.");
+DEFINE_bool(dump_primary_key_bounds_only, false,
+ "Whether to dump rowset primary key bounds only, otherwise, dump all rows.");
DEFINE_bool(dump_metadata, true,
"If true, dumps rowset metadata before dumping data. If false, "
"only dumps the data.");
@@ -188,6 +194,24 @@ using strings::Substitute;
namespace kudu {
namespace tools {
+bool ValidateDumpRowset() {
+ if (FLAGS_dump_all_columns) {
+ if (FLAGS_use_readable_format) {
+ LOG(ERROR) << "Flag --use_readable_format is meaningless "
+ "when --dump_all_columns is enabled.";
+ return false;
+ }
+
+ if (FLAGS_dump_primary_key_bounds_only) {
+ LOG(ERROR) << "Flag --dump_primary_key_bounds_only is meaningless "
+ "when --dump_all_columns is enabled.";
+ return false;
+ }
+ }
+ return true;
+}
+GROUP_FLAG_VALIDATOR(validate_dump_rowset, ValidateDumpRowset);
+
namespace {
constexpr const char* const kSeparatorLine =
@@ -764,6 +788,15 @@ struct TabletSizeStats {
table->AddRow({table_id, tablet_id, rowset_id, "*", HumanReadableNumBytes::ToString(total)});
}
};
+
+string DumpRow(const Schema& key_proj, const RowBlockRow& row, faststring* key) {
+ if (FLAGS_use_readable_format) {
+ return key_proj.DebugRowKey(row);
+ } else {
+ key_proj.EncodeComparableKey(row, key);
+ return strings::b2a_hex(key->ToString());
+ }
+}
} // anonymous namespace
Status SummarizeDataSize(const RunnerContext& context) {
@@ -980,14 +1013,27 @@ Status DumpRowSetInternal(const IOContext& ctx,
RowBlockMemory mem(1024);
RowBlock block(&key_proj, 100, &mem);
faststring key;
+ string lower_bound;
+ string current_upper_bound;
while (it->HasNext()) {
mem.Reset();
RETURN_NOT_OK(it->NextBlock(&block));
- for (int i = 0; i < block.nrows(); i++) {
- key_proj.EncodeComparableKey(block.row(i), &key);
- lines.emplace_back(strings::b2a_hex(key.ToString()));
+ if (FLAGS_dump_primary_key_bounds_only) {
+ if (lower_bound.empty()) {
+ lower_bound = DumpRow(key_proj, block.row(0), &key);
+ }
+ CHECK_GT(block.nrows(), 0);
+ current_upper_bound = DumpRow(key_proj, block.row(block.nrows() - 1), &key);
+ } else {
+ for (int i = 0; i < block.nrows(); i++) {
+ lines.emplace_back(DumpRow(key_proj, block.row(i), &key));
+ }
}
}
+ if (FLAGS_dump_primary_key_bounds_only) {
+ lines.emplace_back(lower_bound);
+ lines.emplace_back(current_upper_bound);
+ }
}
// Respect 'rows_left' when dumping the output.