You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2023/01/06 17:44:52 UTC

[kudu] branch master updated: [tools] Add 'kudu local_replica tmeta delete_rowsets' to delete rowsets from tablet

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 27072d338 [tools] Add 'kudu local_replica tmeta delete_rowsets' to delete rowsets from tablet
27072d338 is described below

commit 27072d3382889b1852f4fef58010115585685bd3
Author: Yingchun Lai <la...@apache.org>
AuthorDate: Mon Dec 5 02:01:52 2022 +0800

    [tools] Add 'kudu local_replica tmeta delete_rowsets' to delete rowsets from tablet
    
    There are some use cases we need to delete rowsets from a tablet.
    For example:
    1. Some blocks are corrupted in a single node cluster, the server cannot be
       started. Note: some data will be lost in this case.
    2. Some rowsets are fully deleted but the blocks can not be GCed (KUDU-3367).
       Note: no data will be lost in this case.
    
    There is 'kudu pbc edit' CLI tool to achieve that, but it's error prone and
    hard to operate when working with large amount of data.
    
    This patch introduces a new CLI tool 'kudu local_replica tmeta delete_rowsets'
    which makes removing rowsets from a tablet much easier.
    
    Change-Id: If2cf9035babf4c3af4c238cebe8dcecd2c65848f
    Reviewed-on: http://gerrit.cloudera.org:8080/19357
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <al...@apache.org>
---
 src/kudu/tablet/tablet_metadata.cc          |   3 +-
 src/kudu/tools/kudu-tool-test.cc            | 155 ++++++++++++++++++++++++++++
 src/kudu/tools/tool_action_common.cc        |   3 +
 src/kudu/tools/tool_action_common.h         |   2 +
 src/kudu/tools/tool_action_local_replica.cc |  90 +++++++++++++++-
 5 files changed, 251 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index 7ef28cad0..d50002c92 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -60,7 +60,8 @@
 
 DEFINE_bool(enable_tablet_orphaned_block_deletion, true,
             "Whether to enable deletion of orphaned blocks from disk. "
-            "Note: This is only exposed for debugging purposes!");
+            "Note: This is only exposed for debugging purposes and used "
+            "in CLI tools.");
 TAG_FLAG(enable_tablet_orphaned_block_deletion, advanced);
 TAG_FLAG(enable_tablet_orphaned_block_deletion, hidden);
 TAG_FLAG(enable_tablet_orphaned_block_deletion, runtime);
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index c98811f65..bd19626ce 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -123,6 +123,7 @@
 #include "kudu/tserver/tserver_admin.proxy.h"
 #include "kudu/util/async_util.h"
 #include "kudu/util/env.h"
+#include "kudu/util/jsonreader.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
@@ -140,12 +141,15 @@
 #include "kudu/util/test_util.h"
 #include "kudu/util/url-coding.h"
 
+DECLARE_bool(enable_tablet_orphaned_block_deletion);
 DECLARE_bool(encrypt_data_at_rest);
 DECLARE_bool(fs_data_dirs_consider_available_space);
 DECLARE_bool(hive_metastore_sasl_enabled);
 DECLARE_bool(show_values);
 DECLARE_bool(show_attributes);
 DECLARE_int32(catalog_manager_inject_latency_load_ca_info_ms);
+DECLARE_int32(flush_threshold_mb);
+DECLARE_int32(flush_threshold_secs);
 DECLARE_int32(heartbeat_interval_ms);
 DECLARE_int32(tserver_unresponsive_timeout_ms);
 DECLARE_int32(rpc_negotiation_inject_delay_ms);
@@ -741,6 +745,40 @@ class ToolTest : public KuduTest {
     return JoinStrings(master_addrs, ",");
   }
 
+  Status ListTabletRowsetIds(const string& metadata_path,
+                             const string& tablet_id,
+                             const string& encryption_args,
+                             set<int64_t>* rowset_ids) {
+    string stdout;
+    RunActionStdoutString(Substitute("pbc dump $0 $1 --json",
+                                     JoinPathSegments(metadata_path, tablet_id), encryption_args),
+                          &stdout);
+
+    JsonReader r(stdout);
+    RETURN_NOT_OK(r.Init());
+    vector<const rapidjson::Value*> results;
+    Status s = r.ExtractObjectArray(r.root(), "rowsets", &results);
+
+    rowset_ids->clear();
+    if (s.IsNotFound()) {
+      return Status::OK();
+    }
+    RETURN_NOT_OK(s);
+    LOG(INFO) << "results size: " << results.size();
+    for (const auto& result : results) {
+      string rowset_id_str;
+      RETURN_NOT_OK(r.ExtractString(result, "id", &rowset_id_str));
+      LOG(INFO) << "rowset_id_str: " << rowset_id_str;
+      int64_t rowset_id;
+      if (!safe_strto64(rowset_id_str, &rowset_id)) {
+        return Status::InvalidArgument(Substitute("invalid rowset id: $0", rowset_id_str));
+      }
+      InsertOrDie(rowset_ids, rowset_id);
+    }
+
+    return Status::OK();
+  }
+
  protected:
   // Note: Each test case must have a single invocation of RunLoadgen() otherwise it leads to
   //       memory leaks.
@@ -1280,6 +1318,7 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     const vector<string> kLocalReplicaModeRegexes = {
         "cmeta.*Operate on a local tablet replica's consensus",
+        "tmeta.*Edit a local tablet metadata",
         "data_size.*Summarize the data size",
         "dump.*Dump a Kudu filesystem",
         "copy_from_remote.*Copy tablet replicas from a remote server",
@@ -1329,6 +1368,12 @@ TEST_F(ToolTest, TestModeHelp) {
     NO_FATALS(RunTestHelp("local-replica copy-from-local --help",
                           kLocalReplicaCopyFromRemoteRegexes));
   }
+  {
+    const vector<string> kLocalReplicaTmetaRegexes = {
+        "delete_rowsets.*Delete rowsets from a local replica.",
+    };
+    NO_FATALS(RunTestHelp("local_replica tmeta", kLocalReplicaTmetaRegexes));
+  }
   {
     const string kCmd = "master";
     const vector<string> kMasterModeRegexes = {
@@ -8754,6 +8799,116 @@ TEST_F(ToolTest, TestRebuildTserverByLocalReplicaCopy) {
   });
 }
 
+// Test for 'local_replica tmeta' functionality.
+TEST_F(ToolTest, TestLocalReplicaTmeta) {
+  constexpr const char* const kTableName = "kudu.local_replica.tmeta";
+  const int kNumTablets = 4;
+  const int kNumTabletServers = 1;
+
+  FLAGS_flush_threshold_mb = 1;
+  FLAGS_flush_threshold_secs = 1;
+  InternalMiniClusterOptions opts;
+  opts.num_tablet_servers = kNumTabletServers;
+  NO_FATALS(StartMiniCluster(std::move(opts)));
+
+  TestWorkload workload(mini_cluster_.get());
+  workload.set_num_tablets(kNumTablets);
+  workload.set_num_replicas(1);
+  workload.set_table_name(kTableName);
+  workload.Setup();
+  workload.Start();
+  // Make sure there are some rowsets flushed.
+  SleepFor(MonoDelta::FromSeconds(2));
+  workload.StopAndJoin();
+  int64_t total_row_count = workload.rows_inserted();
+
+  string encryption_args;
+  if (env_->IsEncryptionEnabled()) {
+    encryption_args =
+        GetEncryptionArgs() + " --instance_file=" +
+        JoinPathSegments(mini_cluster_->mini_tablet_server(0)->options()->fs_opts.wal_root,
+                         "instance");
+  }
+  const string& flags =
+      Substitute("-fs_wal_dir=$0 --fs_data_dirs=$1 $2",
+                 mini_cluster_->mini_tablet_server(0)->options()->fs_opts.wal_root,
+                 JoinStrings(mini_cluster_->mini_tablet_server(0)->options()->fs_opts.data_roots,
+                             ","),
+                 encryption_args);
+
+  // Find the tablet to edit.
+  vector<string> tablet_ids;
+  NO_FATALS(RunActionStdoutLines(Substitute("local_replica list $0", flags), &tablet_ids));
+  ASSERT_EQ(kNumTablets, tablet_ids.size());
+  const string& tablet_id = tablet_ids[0];
+  const string& metadata_path = mini_cluster_->mini_tablet_server(0)->server()
+                                    ->fs_manager()->GetTabletMetadataDir();
+  const string& original_file = JoinPathSegments(metadata_path, tablet_id);
+
+  // The server should be shutdown before editing.
+  string stdout;
+  string stderr;
+  Status s = RunActionStdoutStderrString(
+      Substitute("local_replica tmeta delete_rowsets $0 $1 $2", tablet_id, 0, flags),
+      &stdout, &stderr);
+  ASSERT_TRUE(s.IsRuntimeError()) << s.ToString();
+  ASSERT_STR_CONTAINS(stderr, "failed to load instance files");
+
+  // List rowsets after server shutdown, the rowsets set will not change if not edit the tablet
+  // metadata.
+  mini_cluster_->Shutdown();
+  set<int64_t> rowset_ids;
+  ASSERT_OK(ListTabletRowsetIds(metadata_path, tablet_id, encryption_args, &rowset_ids));
+  ASSERT_FALSE(rowset_ids.empty());
+  int64_t rowset_id_to_delete = *rowset_ids.begin();
+
+  // Remove one rowset from the tablet.
+  ASSERT_OK(RunActionStdoutStderrString(
+      Substitute("local_replica tmeta delete_rowsets $0 $1 $2 --backup_metadata=true "
+                 "--enable_tablet_orphaned_block_deletion=false",
+                 tablet_id, rowset_id_to_delete, flags),
+      &stdout, &stderr));
+
+  // Check the rowset has been deleted successfully.
+  set<int64_t> new_rowset_ids;
+  ASSERT_OK(ListTabletRowsetIds(metadata_path, tablet_id, encryption_args, &new_rowset_ids));
+  ASSERT_EQ(rowset_ids.size() - 1, new_rowset_ids.size());
+  ASSERT_FALSE(ContainsKey(new_rowset_ids, rowset_id_to_delete));
+
+  // The server can be started successfully.
+  // Disable orphaned blocks deletion, because we will roll back to check the original data.
+  FLAGS_enable_tablet_orphaned_block_deletion = false;
+  ASSERT_OK(mini_cluster_->Start());
+  ASSERT_EVENTUALLY([&] {
+    ClusterVerifier v(mini_cluster_.get());
+    NO_FATALS(v.CheckRowCount(kTableName, ClusterVerifier::AT_LEAST, 1));
+  });
+
+  // Roll back the metadata and restart server.
+  mini_cluster_->Shutdown();
+  string backup_file;
+  vector<string> metadata_files;
+  ASSERT_OK(env_->GetChildren(metadata_path, &metadata_files));
+  for (const auto& metadata_file : metadata_files) {
+    if (MatchPattern(metadata_file, Substitute("*$0.bak.*", tablet_id))) {
+      backup_file = metadata_file;
+      break;
+    }
+  }
+  ASSERT_FALSE(backup_file.empty());
+  ASSERT_OK(env_->RenameFile(JoinPathSegments(metadata_path, backup_file), original_file));
+  // Now it's safe to enable orphaned blocks deletion, because there is no orphaned blocks in
+  // original tablet metadata file.
+  FLAGS_enable_tablet_orphaned_block_deletion = true;
+  ASSERT_OK(mini_cluster_->Start());
+
+  // Check there isn't any data loss.
+  ASSERT_EVENTUALLY([&] {
+    ClusterVerifier v(mini_cluster_.get());
+    NO_FATALS(v.CheckRowCount(kTableName, ClusterVerifier::EXACTLY, total_row_count));
+  });
+}
+
 class SetFlagForAllTest :
     public ToolTest,
     public ::testing::WithParamInterface<bool> {
diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc
index ca28e04a2..02828345a 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -276,6 +276,9 @@ const char* const kTabletIdArgDesc = "Tablet Identifier";
 const char* const kTabletIdsCsvArg = "tablet_ids";
 const char* const kTabletIdsCsvArgDesc =
     "Comma-separated list of Tablet Identifiers";
+const char* const kRowsetIdsCsvArg = "rowset_ids";
+const char* const kRowsetIdsCsvArgDesc =
+    "Comma-separated list of Rowset Identifiers";
 
 const char* const kMasterAddressArg = "master_address";
 const char* const kMasterAddressDesc = "Address of a Kudu Master of form "
diff --git a/src/kudu/tools/tool_action_common.h b/src/kudu/tools/tool_action_common.h
index c6e5162d3..157c4aa86 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -73,6 +73,8 @@ extern const char* const kTabletIdArg;
 extern const char* const kTabletIdArgDesc;
 extern const char* const kTabletIdsCsvArg;
 extern const char* const kTabletIdsCsvArgDesc;
+extern const char* const kRowsetIdsCsvArg;
+extern const char* const kRowsetIdsCsvArgDesc;
 
 extern const char* const kMasterAddressArg;
 extern const char* const kMasterAddressDesc;
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index a764a56cf..e8ec825af 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -68,6 +68,7 @@
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
+#include "kudu/gutil/walltime.h"
 #include "kudu/master/sys_catalog.h"
 #include "kudu/tablet/diskrowset.h"
 #include "kudu/tablet/metadata.pb.h"
@@ -101,6 +102,8 @@ class Messenger;
 }  // namespace rpc
 }  // namespace kudu
 
+DEFINE_bool(backup_metadata, true,
+            "Whether to backup tablet metadata file when editing it.");
 DEFINE_bool(dump_all_columns, true,
             "If true, dumped rows include all of the columns in the rowset. If "
             "false, dumped rows include just the key columns (in a comparable format).");
@@ -171,6 +174,7 @@ using kudu::rpc::Messenger;
 using kudu::tablet::DiskRowSet;
 using kudu::tablet::RowIteratorOptions;
 using kudu::tablet::RowSetMetadata;
+using kudu::tablet::RowSetMetadataIds;
 using kudu::tablet::TabletDataState;
 using kudu::tablet::TabletMetadata;
 using kudu::tablet::TabletReplica;
@@ -194,7 +198,7 @@ using strings::Substitute;
 namespace kudu {
 namespace tools {
 
-bool ValidateDumpRowset()  {
+bool ValidateDumpRowset() {
   if (FLAGS_dump_all_columns) {
     if (FLAGS_use_readable_format) {
       LOG(ERROR) << "Flag --use_readable_format is meaningless "
@@ -603,6 +607,52 @@ Status SetRaftTerm(const RunnerContext& context) {
   return cmeta->Flush();
 }
 
+Status DeleteRowsets(const RunnerContext& context) {
+  const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
+  const string& rowset_ids_str = FindOrDie(context.required_args, kRowsetIdsCsvArg);
+  vector<string> rowset_ids_vec = strings::Split(rowset_ids_str, ",", strings::SkipEmpty());
+  if (rowset_ids_vec.empty()) {
+    return Status::InvalidArgument("no rowset identifiers provided");
+  }
+
+  RowSetMetadataIds to_remove;
+  for (const auto& rowset_id_str : rowset_ids_vec) {
+    int64_t rowset_id;
+    if (safe_strto64(rowset_id_str.c_str(), &rowset_id)) {
+      to_remove.insert(rowset_id);
+    } else {
+      return Status::InvalidArgument(Substitute("$0 is not a valid rowset id.", rowset_id_str));
+    }
+  }
+
+  FsManagerOpts fs_opts;
+  fs_opts.read_only = false;
+  fs_opts.skip_block_manager = false;
+  fs_opts.update_instances = fs::UpdateInstanceBehavior::DONT_UPDATE;
+  FsManager fs_manager(Env::Default(), std::move(fs_opts));
+  RETURN_NOT_OK(fs_manager.Open());
+
+  scoped_refptr<TabletMetadata> meta;
+  RETURN_NOT_OK_PREPEND(TabletMetadata::Load(&fs_manager, tablet_id, &meta),
+                        Substitute("could not load tablet metadata for $0", tablet_id));
+
+  if (FLAGS_backup_metadata) {
+    // Move the old tablet metadata file to a backup location.
+    string original_path = fs_manager.GetTabletMetadataPath(tablet_id);
+    string backup_path = Substitute("$0.bak.$1", original_path, GetCurrentTimeMicros());
+    RETURN_NOT_OK_PREPEND(Env::Default()->RenameFile(original_path, backup_path),
+                          "couldn't back up original file");
+    LOG(INFO) << "Moved original file to " << backup_path;
+  }
+
+  RETURN_NOT_OK(meta->UpdateAndFlush(
+      to_remove, /* to_add */ {}, tablet::TabletMetadata::kNoMrsFlushed));
+
+  LOG(INFO) << "Successfully removed rowsets with identifiers:" << JoinElements(to_remove, ",");
+
+  return Status::OK();
+}
+
 Status CopyFromRemote(const RunnerContext& context) {
   // Parse the tablet ID and source arguments.
   const string& tablet_ids_str = FindOrDie(context.required_args, kTabletIdsCsvArg);
@@ -1209,6 +1259,37 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddOptionalParameter("fs_wal_dir")
       .Build();
 
+  unique_ptr<Action> delete_rowsets =
+      ActionBuilder("delete_rowsets", &DeleteRowsets)
+          .Description("Delete rowsets from a local replica.")
+          .ExtraDescription("The common usage pattern of this tool is described below.\n"
+              "That involves checking the result by a dry run of the tablet server with the "
+              "modified tablet's data after running the tool. It's crucial to customize tablet "
+              "server's --enable_tablet_orphaned_block_deletion flag for the dry run to avoid "
+              "deleting orphaned blocks, so it's possible to roll back to the original state of "
+              "the tablet's data if something goes wrong. First, run the tool with default "
+              "settings for  --backup_metadata and --enable_tablet_orphaned_block_deletion to (a) "
+              "create a backup of the original metadata file and (b) keep the orphaned blocks on "
+              "the file system. Second, start the tablet server with "
+              "--enable_tablet_orphaned_block_deletion=false to check whether the change worked as "
+              "expected and the tablet server works fine with the new state of the tablet's data. "
+              "If it doesn't work as expected, stop the tablet server (if still running), rollback "
+              "the change by replacing the updated metadata file with the backup created earlier, "
+              "and retry the  procedure again, specifying proper rowset identifiers to the tool. "
+              "If the change works as expected and the tablet server runs fine after with the "
+              "updated tablet's data, remove the customization for the "
+              "--enable_tablet_orphaned_block_deletion flag and restart the tablet server.")
+          .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc })
+          .AddRequiredParameter({ kRowsetIdsCsvArg, kRowsetIdsCsvArgDesc })
+          .AddOptionalParameter("backup_metadata")
+          // Set --enable_tablet_orphaned_block_deletion to false to promote a safer usage
+          // of this tools.
+          .AddOptionalParameter("enable_tablet_orphaned_block_deletion", string("false"))
+          .AddOptionalParameter("fs_data_dirs")
+          .AddOptionalParameter("fs_metadata_dir")
+          .AddOptionalParameter("fs_wal_dir")
+          .Build();
+
   unique_ptr<Mode> cmeta =
       ModeBuilder("cmeta")
       .Description("Operate on a local tablet replica's consensus "
@@ -1218,6 +1299,12 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddAction(std::move(set_term))
       .Build();
 
+  unique_ptr<Mode> tmeta =
+      ModeBuilder("tmeta")
+      .Description("Edit a local tablet metadata")
+      .AddAction(std::move(delete_rowsets))
+      .Build();
+
   unique_ptr<Action> copy_from_remote =
       ActionBuilder("copy_from_remote", &CopyFromRemote)
       .Description("Copy tablet replicas from a remote server")
@@ -1280,6 +1367,7 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
   return ModeBuilder("local_replica")
       .Description("Operate on local tablet replicas via the local filesystem")
       .AddMode(std::move(cmeta))
+      .AddMode(std::move(tmeta))
       .AddAction(std::move(copy_from_local))
       .AddAction(std::move(copy_from_remote))
       .AddAction(std::move(data_size))