You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/02/24 20:28:31 UTC

[2/2] kudu git commit: KUDU-1613: evict replicas with wrong server UUIDs

KUDU-1613: evict replicas with wrong server UUIDs

Currently, if a tablet server is wiped and restarted at the same host
and port with a new UUID assigned to it before the replicas on its
previous incarnation are evicted, those replicas won't ever be evicted.
With this patch, upon receiving a response with WRONG_SERVER_UUID,
leaders will treat the follower as failed and handle it appropriately,
re-replicating them elsewhere.

A new test case is added to raft_consensus-itest that is set up to
prompt eviction (i.e. low timeouts, ample servers to replicate to), and
without the change, this test fails.

Change-Id: If0c7ae9dfbd99c2c9a38e8b1f7748c70882e06b4
Reviewed-on: http://gerrit.cloudera.org:8080/9136
Reviewed-by: David Ribeiro Alves <da...@gmail.com>
Tested-by: Kudu Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/9410


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1042e416
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1042e416
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1042e416

Branch: refs/heads/branch-1.6.x
Commit: 1042e4164629155976ca1b7a0856a1a262876693
Parents: 2bf90b4
Author: Andrew Wong <aw...@cloudera.com>
Authored: Tue Jan 23 21:16:54 2018 -0800
Committer: Andrew Wong <aw...@cloudera.com>
Committed: Sat Feb 24 02:44:27 2018 +0000

----------------------------------------------------------------------
 src/kudu/consensus/consensus_peers.cc           | 20 ++++--
 .../integration-tests/raft_consensus-itest.cc   | 72 ++++++++++++++++++++
 src/kudu/mini-cluster/external_mini_cluster.h   |  3 +
 3 files changed, 88 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/consensus/consensus_peers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_peers.cc b/src/kudu/consensus/consensus_peers.cc
index ffa373d..52e6b3f 100644
--- a/src/kudu/consensus/consensus_peers.cc
+++ b/src/kudu/consensus/consensus_peers.cc
@@ -315,13 +315,19 @@ void Peer::ProcessResponse() {
   if (response_.has_error()) {
     Status response_status = StatusFromPB(response_.error().status());
     PeerStatus ps;
-    if (response_.error().code() == TabletServerErrorPB::TABLET_FAILED) {
-      ps = PeerStatus::TABLET_FAILED;
-    } else if (response_.error().code() == TabletServerErrorPB::TABLET_NOT_FOUND) {
-      ps = PeerStatus::TABLET_NOT_FOUND;
-    } else {
-      // Unknown kind of error.
-      ps = PeerStatus::REMOTE_ERROR;
+    TabletServerErrorPB resp_error = response_.error();
+    switch (response_.error().code()) {
+      // We treat WRONG_SERVER_UUID as failed.
+      case TabletServerErrorPB::WRONG_SERVER_UUID: FALLTHROUGH_INTENDED;
+      case TabletServerErrorPB::TABLET_FAILED:
+        ps = PeerStatus::TABLET_FAILED;
+        break;
+      case TabletServerErrorPB::TABLET_NOT_FOUND:
+        ps = PeerStatus::TABLET_NOT_FOUND;
+        break;
+      default:
+        // Unknown kind of error.
+        ps = PeerStatus::REMOTE_ERROR;
     }
     queue_->UpdatePeerStatus(peer_pb_.permanent_uuid(), ps, response_status);
     ProcessResponseError(response_status);

http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 72690ce..cdd164a 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -70,8 +70,12 @@
 #include "kudu/tserver/tserver_service.proxy.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/countdown_latch.h"
+#include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/path_util.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/random.h"
 #include "kudu/util/status.h"
@@ -94,7 +98,10 @@ using kudu::client::KuduInsert;
 using kudu::client::KuduSession;
 using kudu::client::KuduTable;
 using kudu::client::sp::shared_ptr;
+using kudu::cluster::ExternalDaemonOptions;
 using kudu::cluster::ExternalTabletServer;
+using kudu::cluster::ExternalMiniCluster;
+using kudu::cluster::ExternalMiniClusterOptions;
 using kudu::consensus::ConsensusRequestPB;
 using kudu::consensus::ConsensusResponsePB;
 using kudu::consensus::ConsensusServiceProxy;
@@ -119,6 +126,7 @@ using kudu::itest::WaitUntilLeader;
 using kudu::itest::WriteSimpleTestRow;
 using kudu::master::TabletLocationsPB;
 using kudu::master::VOTER_REPLICA;
+using kudu::env_util::ListFilesInDir;
 using kudu::pb_util::SecureDebugString;
 using kudu::pb_util::SecureShortDebugString;
 using kudu::rpc::RpcController;
@@ -2577,5 +2585,69 @@ TEST_F(RaftConsensusITest, TestLogIOErrorIsFatal) {
   ASSERT_OK(ext_tservers[0]->WaitForFatal(MonoDelta::FromSeconds(10)));
 }
 
+// KUDU-1613: Test that when we reset and restart a tablet server, with a new
+// uuid but with the same host and port, replicas that were hosted by the
+// previous incarnation are correctly detected as failed and eventually
+// re-replicated.
+TEST_F(RaftConsensusITest, TestRestartWithDifferentUUID) {
+  // Start a cluster and insert data.
+  ExternalMiniClusterOptions opts;
+  opts.num_tablet_servers = 5;
+  opts.extra_tserver_flags = {
+    // Set a low timeout. If we can't re-replicate in a reasonable amount of
+    // time, it means we're not evicting at all.
+    "--follower_unavailable_considered_failed_sec=10",
+  };
+  cluster_.reset(new ExternalMiniCluster(std::move(opts)));
+  ASSERT_OK(cluster_->Start());
+
+  // Write some data. In writing many tablets, we're making it more likely that
+  // all tablet servers will have some tablets on them.
+  TestWorkload writes(cluster_.get());
+  writes.set_num_tablets(25);
+  writes.set_timeout_allowed(true);
+  writes.Setup();
+  writes.Start();
+  const auto wait_for_some_inserts = [&] {
+    const auto rows_init = writes.rows_inserted();
+    ASSERT_EVENTUALLY([&] {
+      ASSERT_GT(writes.rows_inserted() - rows_init, 1000);
+    });
+  };
+  wait_for_some_inserts();
+
+  // Completely shut down one of the tablet servers, keeping its ports so we
+  // can take over with another tablet server.
+  ExternalTabletServer* ts = cluster_->tablet_server(0);
+  vector<HostPort> master_hostports;
+  for (int i = 0; i < cluster_->num_masters(); i++) {
+    master_hostports.push_back(cluster_->master(i)->bound_rpc_hostport());
+  }
+  ExternalDaemonOptions ts_opts = ts->opts();
+  LOG(INFO) << Substitute("Storing port $0 for use by new tablet server",
+                          ts->bound_rpc_hostport().ToString());
+  ts_opts.rpc_bind_address = ts->bound_rpc_hostport();
+  ts->Shutdown();
+
+  // With one server down, we should be able to accept writes.
+  wait_for_some_inserts();
+  writes.StopAndJoin();
+
+  // Start up a new server with a new UUID bound to the old RPC port.
+  ts_opts.wal_dir = Substitute("$0-new", ts_opts.wal_dir);
+  ts_opts.data_dirs = { Substitute("$0-new", ts_opts.data_dirs[0]) };
+  ASSERT_OK(env_->CreateDir(ts_opts.wal_dir));
+  ASSERT_OK(env_->CreateDir(ts_opts.data_dirs[0]));
+  scoped_refptr<ExternalTabletServer> new_ts =
+    new ExternalTabletServer(ts_opts, master_hostports);
+  ASSERT_OK(new_ts->Start());
+
+  // Eventually the new server should be copied to.
+  ASSERT_EVENTUALLY([&] {
+    vector<string> files_in_wal_dir;
+    ASSERT_OK(ListFilesInDir(env_, JoinPathSegments(ts_opts.wal_dir, "wals"), &files_in_wal_dir));
+    ASSERT_FALSE(files_in_wal_dir.empty());
+  });
+}
 }  // namespace tserver
 }  // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/mini-cluster/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h
index d969527..5bd6e59 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -466,6 +466,9 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
   // Modifying these flags will only take effect on the next restart.
   std::vector<std::string>* mutable_flags() { return &opts_.extra_flags; }
 
+  // Return the options used to create the daemon.
+  ExternalDaemonOptions opts() const { return opts_; }
+
  protected:
   friend class RefCountedThreadSafe<ExternalDaemon>;
   virtual ~ExternalDaemon();