You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/02/24 20:28:31 UTC
[2/2] kudu git commit: KUDU-1613: evict replicas with wrong server
UUIDs
KUDU-1613: evict replicas with wrong server UUIDs
Currently, if a tablet server is wiped and restarted at the same host
and port with a new UUID assigned to it before the replicas on its
previous incarnation are evicted, those replicas won't ever be evicted.
With this patch, upon receiving a response with WRONG_SERVER_UUID,
leaders will treat the follower as failed and handle it appropriately,
re-replicating them elsewhere.
A new test case is added to raft_consensus-itest that is set up to
prompt eviction (i.e. low timeouts, ample servers to replicate to), and
without the change, this test fails.
Change-Id: If0c7ae9dfbd99c2c9a38e8b1f7748c70882e06b4
Reviewed-on: http://gerrit.cloudera.org:8080/9136
Reviewed-by: David Ribeiro Alves <da...@gmail.com>
Tested-by: Kudu Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/9410
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1042e416
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1042e416
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1042e416
Branch: refs/heads/branch-1.6.x
Commit: 1042e4164629155976ca1b7a0856a1a262876693
Parents: 2bf90b4
Author: Andrew Wong <aw...@cloudera.com>
Authored: Tue Jan 23 21:16:54 2018 -0800
Committer: Andrew Wong <aw...@cloudera.com>
Committed: Sat Feb 24 02:44:27 2018 +0000
----------------------------------------------------------------------
src/kudu/consensus/consensus_peers.cc | 20 ++++--
.../integration-tests/raft_consensus-itest.cc | 72 ++++++++++++++++++++
src/kudu/mini-cluster/external_mini_cluster.h | 3 +
3 files changed, 88 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/consensus/consensus_peers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_peers.cc b/src/kudu/consensus/consensus_peers.cc
index ffa373d..52e6b3f 100644
--- a/src/kudu/consensus/consensus_peers.cc
+++ b/src/kudu/consensus/consensus_peers.cc
@@ -315,13 +315,19 @@ void Peer::ProcessResponse() {
if (response_.has_error()) {
Status response_status = StatusFromPB(response_.error().status());
PeerStatus ps;
- if (response_.error().code() == TabletServerErrorPB::TABLET_FAILED) {
- ps = PeerStatus::TABLET_FAILED;
- } else if (response_.error().code() == TabletServerErrorPB::TABLET_NOT_FOUND) {
- ps = PeerStatus::TABLET_NOT_FOUND;
- } else {
- // Unknown kind of error.
- ps = PeerStatus::REMOTE_ERROR;
+ TabletServerErrorPB resp_error = response_.error();
+ switch (response_.error().code()) {
+ // We treat WRONG_SERVER_UUID as failed.
+ case TabletServerErrorPB::WRONG_SERVER_UUID: FALLTHROUGH_INTENDED;
+ case TabletServerErrorPB::TABLET_FAILED:
+ ps = PeerStatus::TABLET_FAILED;
+ break;
+ case TabletServerErrorPB::TABLET_NOT_FOUND:
+ ps = PeerStatus::TABLET_NOT_FOUND;
+ break;
+ default:
+ // Unknown kind of error.
+ ps = PeerStatus::REMOTE_ERROR;
}
queue_->UpdatePeerStatus(peer_pb_.permanent_uuid(), ps, response_status);
ProcessResponseError(response_status);
http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 72690ce..cdd164a 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -70,8 +70,12 @@
#include "kudu/tserver/tserver_service.proxy.h"
#include "kudu/util/atomic.h"
#include "kudu/util/countdown_latch.h"
+#include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
#include "kudu/util/metrics.h"
#include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/path_util.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/random.h"
#include "kudu/util/status.h"
@@ -94,7 +98,10 @@ using kudu::client::KuduInsert;
using kudu::client::KuduSession;
using kudu::client::KuduTable;
using kudu::client::sp::shared_ptr;
+using kudu::cluster::ExternalDaemonOptions;
using kudu::cluster::ExternalTabletServer;
+using kudu::cluster::ExternalMiniCluster;
+using kudu::cluster::ExternalMiniClusterOptions;
using kudu::consensus::ConsensusRequestPB;
using kudu::consensus::ConsensusResponsePB;
using kudu::consensus::ConsensusServiceProxy;
@@ -119,6 +126,7 @@ using kudu::itest::WaitUntilLeader;
using kudu::itest::WriteSimpleTestRow;
using kudu::master::TabletLocationsPB;
using kudu::master::VOTER_REPLICA;
+using kudu::env_util::ListFilesInDir;
using kudu::pb_util::SecureDebugString;
using kudu::pb_util::SecureShortDebugString;
using kudu::rpc::RpcController;
@@ -2577,5 +2585,69 @@ TEST_F(RaftConsensusITest, TestLogIOErrorIsFatal) {
ASSERT_OK(ext_tservers[0]->WaitForFatal(MonoDelta::FromSeconds(10)));
}
+// KUDU-1613: Test that when we reset and restart a tablet server, with a new
+// uuid but with the same host and port, replicas that were hosted by the
+// previous incarnation are correctly detected as failed and eventually
+// re-replicated.
+TEST_F(RaftConsensusITest, TestRestartWithDifferentUUID) {
+ // Start a cluster and insert data.
+ ExternalMiniClusterOptions opts;
+ opts.num_tablet_servers = 5;
+ opts.extra_tserver_flags = {
+ // Set a low timeout. If we can't re-replicate in a reasonable amount of
+ // time, it means we're not evicting at all.
+ "--follower_unavailable_considered_failed_sec=10",
+ };
+ cluster_.reset(new ExternalMiniCluster(std::move(opts)));
+ ASSERT_OK(cluster_->Start());
+
+ // Write some data. In writing many tablets, we're making it more likely that
+ // all tablet servers will have some tablets on them.
+ TestWorkload writes(cluster_.get());
+ writes.set_num_tablets(25);
+ writes.set_timeout_allowed(true);
+ writes.Setup();
+ writes.Start();
+ const auto wait_for_some_inserts = [&] {
+ const auto rows_init = writes.rows_inserted();
+ ASSERT_EVENTUALLY([&] {
+ ASSERT_GT(writes.rows_inserted() - rows_init, 1000);
+ });
+ };
+ wait_for_some_inserts();
+
+ // Completely shut down one of the tablet servers, keeping its ports so we
+ // can take over with another tablet server.
+ ExternalTabletServer* ts = cluster_->tablet_server(0);
+ vector<HostPort> master_hostports;
+ for (int i = 0; i < cluster_->num_masters(); i++) {
+ master_hostports.push_back(cluster_->master(i)->bound_rpc_hostport());
+ }
+ ExternalDaemonOptions ts_opts = ts->opts();
+ LOG(INFO) << Substitute("Storing port $0 for use by new tablet server",
+ ts->bound_rpc_hostport().ToString());
+ ts_opts.rpc_bind_address = ts->bound_rpc_hostport();
+ ts->Shutdown();
+
+ // With one server down, we should be able to accept writes.
+ wait_for_some_inserts();
+ writes.StopAndJoin();
+
+ // Start up a new server with a new UUID bound to the old RPC port.
+ ts_opts.wal_dir = Substitute("$0-new", ts_opts.wal_dir);
+ ts_opts.data_dirs = { Substitute("$0-new", ts_opts.data_dirs[0]) };
+ ASSERT_OK(env_->CreateDir(ts_opts.wal_dir));
+ ASSERT_OK(env_->CreateDir(ts_opts.data_dirs[0]));
+ scoped_refptr<ExternalTabletServer> new_ts =
+ new ExternalTabletServer(ts_opts, master_hostports);
+ ASSERT_OK(new_ts->Start());
+
+ // Eventually the new server should be copied to.
+ ASSERT_EVENTUALLY([&] {
+ vector<string> files_in_wal_dir;
+ ASSERT_OK(ListFilesInDir(env_, JoinPathSegments(ts_opts.wal_dir, "wals"), &files_in_wal_dir));
+ ASSERT_FALSE(files_in_wal_dir.empty());
+ });
+}
} // namespace tserver
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/1042e416/src/kudu/mini-cluster/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h
index d969527..5bd6e59 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -466,6 +466,9 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
// Modifying these flags will only take effect on the next restart.
std::vector<std::string>* mutable_flags() { return &opts_.extra_flags; }
+ // Return the options used to create the daemon.
+ ExternalDaemonOptions opts() const { return opts_; }
+
protected:
friend class RefCountedThreadSafe<ExternalDaemon>;
virtual ~ExternalDaemon();