You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/06/08 22:56:20 UTC

[5/5] kudu git commit: tool: add a 'local-replica cmeta set-term' tool

tool: add a 'local-replica cmeta set-term' tool

Through abuse of some other tools (and restoring from a backup of a
cmeta file) I ended up with a situation where a tablet's WAL contained
operations from a term higher than the term listed in its cmeta file.
This caused the tablet to fail to start due to seeing this inconsistency
(the highest term in the WAL should always be <= the term in the cmeta).

This patch adds a tool that I wrote in order to recover from the
situation. The tool allows the operator to override the term stored in
the cmeta file. It's restricted to only bumping the term upwards, since
doing so is always "safe" whereas reducing it backwards could have
really bad consequences.

I also took the opportunity to add some new tests for the 'cmeta' tool
functionality.

Change-Id: I7525ffbe772f214e0972a6b450f3f1609109ca05
Reviewed-on: http://gerrit.cloudera.org:8080/7049
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/9ae11e6d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/9ae11e6d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/9ae11e6d

Branch: refs/heads/master
Commit: 9ae11e6d86eb33321fbb219927b27424e58f6e2c
Parents: e77538b
Author: Todd Lipcon <to...@cloudera.com>
Authored: Thu Jun 1 14:15:49 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Jun 8 22:54:59 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-tool-test.cc            | 61 +++++++++++++++++++-
 src/kudu/tools/tool_action_local_replica.cc | 72 ++++++++++++++++++++----
 2 files changed, 122 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/9ae11e6d/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 615ef1e..7bc23a7 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -405,7 +405,8 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     const vector<string> kLocalReplicaCMetaRegexes = {
         "print_replica_uuids.*Print all tablet replica peer UUIDs",
-        "rewrite_raft_config.*Rewrite a tablet replica"
+        "rewrite_raft_config.*Rewrite a tablet replica",
+        "set_term.*Bump the current term"
     };
     NO_FATALS(RunTestHelp("local_replica cmeta", kLocalReplicaCMetaRegexes));
     // Try with a hyphen instead of an underscore.
@@ -1480,6 +1481,64 @@ TEST_F(ToolTest, TestLocalReplicaTombstoneDelete) {
   }
 }
 
+// Test for 'local_replica cmeta' functionality.
+TEST_F(ToolTest, TestLocalReplicaCMetaOps) {
+  NO_FATALS(StartMiniCluster());
+
+  // TestWorkLoad.Setup() internally generates a table.
+  TestWorkload workload(mini_cluster_.get());
+  workload.set_num_replicas(1);
+  workload.Setup();
+  MiniTabletServer* ts = mini_cluster_->mini_tablet_server(0);
+  const string ts_uuid = ts->uuid();
+  const string& flags = Substitute("-fs-wal-dir $0", ts->options()->fs_opts.wal_path);
+  string tablet_id;
+  {
+    vector<string> tablets;
+    NO_FATALS(RunActionStdoutLines(Substitute("local_replica list $0", flags), &tablets));
+    ASSERT_EQ(1, tablets.size());
+    tablet_id = tablets[0];
+  }
+  const auto& cmeta_path = ts->server()->fs_manager()->GetConsensusMetadataPath(tablet_id);
+
+  ts->Shutdown();
+
+  // Test print_replica_uuids.
+  // We only have a single replica, so we expect one line, with our server's UUID.
+  {
+    vector<string> uuids;
+    NO_FATALS(RunActionStdoutLines(Substitute("local_replica cmeta print_replica_uuids $0 $1",
+                                               flags, tablet_id), &uuids));
+    ASSERT_EQ(1, uuids.size());
+    EXPECT_EQ(ts_uuid, uuids[0]);
+  }
+
+  // Test using set-term to bump the term to 123.
+  {
+    NO_FATALS(RunActionStdoutNone(Substitute("local_replica cmeta set-term $0 $1 123",
+                                             flags, tablet_id)));
+
+    string stdout;
+    NO_FATALS(RunActionStdoutString(Substitute("pbc dump $0", cmeta_path),
+                                    &stdout));
+    ASSERT_STR_CONTAINS(stdout, "current_term: 123");
+  }
+
+  // Test that set-term refuses to decrease the term.
+  {
+    string stdout, stderr;
+    Status s = RunTool(Substitute("local_replica cmeta set-term $0 $1 10",
+                                  flags, tablet_id),
+                       &stdout, &stderr,
+                       /* stdout_lines = */ nullptr,
+                       /* stderr_lines = */ nullptr);
+    EXPECT_FALSE(s.ok());
+    EXPECT_EQ("", stdout);
+    EXPECT_THAT(stderr, testing::HasSubstr(
+        "specified term 10 must be higher than current term 123"));
+  }
+}
+
 TEST_F(ToolTest, TestTserverList) {
   NO_FATALS(StartExternalMiniCluster({}, {}, 1));
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/9ae11e6d/src/kudu/tools/tool_action_local_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index 0736a66..0cb438c 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -129,6 +129,8 @@ namespace {
 const char* const kSeparatorLine =
     "----------------------------------------------------------------------\n";
 
+const char* const kTermArg = "term";
+
 string Indent(int indent) {
   return string(indent, ' ');
 }
@@ -224,6 +226,19 @@ Status PrintReplicaUuids(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status BackupConsensusMetadata(FsManager* fs_manager,
+                               const string& tablet_id) {
+  Env* env = fs_manager->env();
+  string cmeta_filename = fs_manager->GetConsensusMetadataPath(tablet_id);
+  string backup_filename = Substitute("$0.pre_rewrite.$1", cmeta_filename, env->NowMicros());
+  WritableFileOptions opts;
+  opts.mode = Env::CREATE_NON_EXISTING;
+  opts.sync_on_close = true;
+  RETURN_NOT_OK(env_util::CopyFile(env, cmeta_filename, backup_filename, opts));
+  LOG(INFO) << "Backed up old consensus metadata to " << backup_filename;
+  return Status::OK();
+}
+
 Status RewriteRaftConfig(const RunnerContext& context) {
   // Parse tablet ID argument.
   const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
@@ -246,15 +261,7 @@ Status RewriteRaftConfig(const RunnerContext& context) {
   Env* env = Env::Default();
   FsManager fs_manager(env, FsManagerOpts());
   RETURN_NOT_OK(fs_manager.Open());
-  string cmeta_filename = fs_manager.GetConsensusMetadataPath(tablet_id);
-  string backup_filename = Substitute("$0.pre_rewrite.$1",
-                                      cmeta_filename, env->NowMicros());
-  WritableFileOptions opts;
-  opts.mode = Env::CREATE_NON_EXISTING;
-  opts.sync_on_close = true;
-  RETURN_NOT_OK(env_util::CopyFile(env, cmeta_filename,
-                                   backup_filename, opts));
-  LOG(INFO) << "Backed up current config to " << backup_filename;
+  RETURN_NOT_OK(BackupConsensusMetadata(&fs_manager, tablet_id));
 
   // Load the cmeta file and rewrite the raft config.
   unique_ptr<ConsensusMetadata> cmeta;
@@ -276,6 +283,41 @@ Status RewriteRaftConfig(const RunnerContext& context) {
   return cmeta->Flush();
 }
 
+Status SetRaftTerm(const RunnerContext& context) {
+  // Parse tablet ID argument.
+  const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
+  const string& new_term_str = FindOrDie(context.required_args, kTermArg);
+  int64_t new_term;
+  if (!safe_strto64(new_term_str, &new_term) || new_term <= 0) {
+    return Status::InvalidArgument("invalid term");
+  }
+
+  // Load the current metadata from disk and verify that the intended operation is safe.
+  Env* env = Env::Default();
+  FsManager fs_manager(env, FsManagerOpts());
+  RETURN_NOT_OK(fs_manager.Open());
+  // Load the cmeta file and rewrite the raft config.
+  unique_ptr<ConsensusMetadata> cmeta;
+  RETURN_NOT_OK(ConsensusMetadata::Load(&fs_manager, tablet_id,
+                                        fs_manager.uuid(), &cmeta));
+  if (new_term <= cmeta->current_term()) {
+    return Status::InvalidArgument(Substitute(
+        "specified term $0 must be higher than current term $1",
+        new_term, cmeta->current_term()));
+  }
+
+  // Make a copy of the old file before rewriting it.
+  RETURN_NOT_OK(BackupConsensusMetadata(&fs_manager, tablet_id));
+
+  // Update and flush.
+  cmeta->set_current_term(new_term);
+  // The 'voted_for' field is relative to the term stored in 'current_term'. So, if we
+  // have changed to a new term, we need to also clear any previous vote record that was
+  // associated with the old term.
+  cmeta->clear_voted_for();
+  return cmeta->Flush();
+}
+
 Status CopyFromRemote(const RunnerContext& context) {
   // Parse the tablet ID and source arguments.
   const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
@@ -758,12 +800,23 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddOptionalParameter("fs_data_dirs")
       .Build();
 
+  unique_ptr<Action> set_term =
+      ActionBuilder("set_term", &SetRaftTerm)
+      .Description("Bump the current term stored in consensus metadata")
+      .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc })
+      .AddRequiredParameter({ kTermArg, "the new raft term (must be greater "
+        "than the current term)" })
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("fs_data_dirs")
+      .Build();
+
   unique_ptr<Mode> cmeta =
       ModeBuilder("cmeta")
       .Description("Operate on a local tablet replica's consensus "
         "metadata file")
       .AddAction(std::move(print_replica_uuids))
       .AddAction(std::move(rewrite_raft_config))
+      .AddAction(std::move(set_term))
       .Build();
 
   unique_ptr<Action> copy_from_remote =
@@ -806,4 +859,3 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
 
 } // namespace tools
 } // namespace kudu
-