You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/12/02 05:28:41 UTC
[3/3] kudu git commit: KUDU-2229. consensus: Leader should not start
FD
KUDU-2229. consensus: Leader should not start FD
This patch fixes a log spam issue caused by commit
28a671365d6d38da966481daf937b3776e3d4852 that erroneously enables the
failure detector on the leader when a configuration takes place. That
bug had no consensus safety implications, but it did generate a large
volume of confusing log messages in a neverending loop.
This patch also includes some minor updates to InternalMiniCluster for
convenience purposes.
Change-Id: Ie2ec9c5499e8e4c1659333bd53dd2d7f6dae81f5
Reviewed-on: http://gerrit.cloudera.org:8080/8711
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/8732
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d9eb7d4c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d9eb7d4c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d9eb7d4c
Branch: refs/heads/branch-1.6.x
Commit: d9eb7d4cb696cb460a988cda8d36e5719f4c283c
Parents: 01876d0
Author: Mike Percy <mp...@apache.org>
Authored: Thu Nov 30 19:40:23 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Sat Dec 2 05:28:19 2017 +0000
----------------------------------------------------------------------
src/kudu/consensus/raft_consensus.cc | 21 ++-
src/kudu/consensus/raft_consensus.h | 19 ++-
src/kudu/integration-tests/CMakeLists.txt | 3 +-
...raft_consensus_failure_detector-imc-itest.cc | 144 +++++++++++++++++++
src/kudu/mini-cluster/external_mini_cluster.h | 4 +-
src/kudu/mini-cluster/internal_mini_cluster.cc | 18 +++
src/kudu/mini-cluster/internal_mini_cluster.h | 5 +
src/kudu/mini-cluster/mini_cluster.h | 4 +
src/kudu/rpc/periodic.cc | 5 +
src/kudu/rpc/periodic.h | 5 +-
10 files changed, 206 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index 291c146..cb3118e 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -599,7 +599,7 @@ Status RaftConsensus::BecomeReplicaUnlocked(boost::optional<MonoDelta> fd_delta)
// Enable/disable leader failure detection if becoming VOTER/NON_VOTER replica
// correspondingly.
- ToggleFailureDetector(std::move(fd_delta));
+ UpdateFailureDetectorState(std::move(fd_delta));
// Now that we're a replica, we can allow voting for other nodes.
withhold_votes_until_ = MonoTime::Min();
@@ -2610,7 +2610,7 @@ void RaftConsensus::CompleteConfigChangeRoundUnlocked(ConsensusRound* round, con
// Disable leader failure detection if transitioning from VOTER to
// NON_VOTER and vice versa.
- ToggleFailureDetector();
+ UpdateFailureDetectorState();
} else {
LOG_WITH_PREFIX_UNLOCKED(INFO)
<< "Skipping abort of non-pending config change with OpId "
@@ -2670,14 +2670,15 @@ void RaftConsensus::DisableFailureDetector() {
}
}
-void RaftConsensus::ToggleFailureDetector(boost::optional<MonoDelta> delta) {
- if (IsRaftConfigVoter(peer_uuid(), cmeta_->ActiveConfig())) {
- // A non-leader voter replica should run failure detector.
+void RaftConsensus::UpdateFailureDetectorState(boost::optional<MonoDelta> delta) {
+ const auto& uuid = peer_uuid();
+ if (uuid != cmeta_->leader_uuid() &&
+ cmeta_->IsVoterInConfig(uuid, ACTIVE_CONFIG)) {
+ // A voter that is not the leader should run the failure detector.
EnableFailureDetector(std::move(delta));
} else {
- // A non-voter should not start leader elections. The leader failure
- // detector should be re-enabled once the non-voter replica is promoted
- // to voter replica.
+ // Otherwise, the local peer should not start leader elections
+ // (e.g. if it is the leader, a non-voter, a non-participant, etc).
DisableFailureDetector();
}
}
@@ -2805,9 +2806,7 @@ Status RaftConsensus::SetPendingConfigUnlocked(const RaftConfigPB& new_config) {
}
cmeta_->set_pending_config(new_config);
- // Disable leader failure detection if transitioning from VOTER to NON_VOTER
- // and vice versa.
- ToggleFailureDetector();
+ UpdateFailureDetectorState();
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/consensus/raft_consensus.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h
index 2c438d1..5ace791 100644
--- a/src/kudu/consensus/raft_consensus.h
+++ b/src/kudu/consensus/raft_consensus.h
@@ -158,6 +158,11 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
// Returns Status::TimedOut if the role is not LEADER within 'timeout'.
Status WaitUntilLeaderForTests(const MonoDelta& timeout);
+ // Return a copy of the failure detector instance. Only for use in tests.
+ std::shared_ptr<rpc::PeriodicTimer> GetFailureDetectorForTests() const {
+ return failure_detector_;
+ }
+
// Implement a LeaderStepDown() request.
Status StepDown(LeaderStepDownResponsePB* resp);
@@ -582,13 +587,15 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
// If the failure detector is already disabled, has no effect.
void DisableFailureDetector();
- // Disable leader failure detector if transitioning from VOTER to NON_VOTER,
- // and vice versa. The decision is based on the type of membership of the
- // peer in the active Raft configuration.
+ // Enables or disables the failure detector based on the role of the local
+ // peer in the active config. If the local peer a VOTER, but not the leader,
+ // then failure detection will be enabled. If the local peer is the leader,
+ // or a NON_VOTER, then failure detection will be disabled.
//
- // If it's time to enable the leader failure detection, the specified
- // 'delta' value is used as described in EnableFailureDetector()'s comment.
- void ToggleFailureDetector(boost::optional<MonoDelta> delta = boost::none);
+ // See EnableFailureDetector() for an explanation of the 'delta' parameter,
+ // which is used if it is determined that the failure detector should be
+ // enabled.
+ void UpdateFailureDetectorState(boost::optional<MonoDelta> delta = boost::none);
// "Reset" the failure detector to indicate leader activity.
//
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index df5853b..a3f2294 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -86,9 +86,10 @@ ADD_KUDU_TEST(master-stress-test RESOURCE_LOCK "master-rpc-ports")
ADD_KUDU_TEST(multidir_cluster-itest)
ADD_KUDU_TEST(open-readonly-fs-itest)
ADD_KUDU_TEST(raft_config_change-itest)
-ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
ADD_KUDU_TEST(raft_consensus_election-itest)
+ADD_KUDU_TEST(raft_consensus_failure_detector-imc-itest)
ADD_KUDU_TEST(raft_consensus_nonvoter-itest)
+ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(security-faults-itest)
ADD_KUDU_TEST(security-itest)
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/integration-tests/raft_consensus_failure_detector-imc-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_failure_detector-imc-itest.cc b/src/kudu/integration-tests/raft_consensus_failure_detector-imc-itest.cc
new file mode 100644
index 0000000..32acff3
--- /dev/null
+++ b/src/kudu/integration-tests/raft_consensus_failure_detector-imc-itest.cc
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/consensus/metadata.pb.h"
+#include "kudu/consensus/raft_consensus.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
+#include "kudu/integration-tests/internal_mini_cluster-itest-base.h"
+#include "kudu/integration-tests/test_workload.h"
+#include "kudu/mini-cluster/internal_mini_cluster.h"
+#include "kudu/rpc/periodic.h"
+#include "kudu/tablet/tablet_replica.h"
+#include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/tserver/tablet_server.h"
+#include "kudu/tserver/ts_tablet_manager.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+using kudu::consensus::RaftPeerPB;
+using kudu::itest::WaitForServersToAgree;
+using kudu::tablet::TabletReplica;
+using std::string;
+using std::vector;
+
+namespace kudu {
+
+class RaftConsensusFailureDetectorIMCTest : public MiniClusterITestBase {
+};
+
+// Ensure that the failure detector is enabled for non-leader voters, and
+// disabled for leaders and non-voters. Ensure this persists after a
+// configuration change.
+// Regression test for KUDU-2229.
+TEST_F(RaftConsensusFailureDetectorIMCTest, TestFailureDetectorActivation) {
+ if (!AllowSlowTests()) {
+ LOG(WARNING) << "Skipping test in fast-test mode.";
+ return;
+ }
+
+ const MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+
+ const int kNumReplicas = 3;
+ NO_FATALS(StartCluster(/*num_tablet_servers=*/ kNumReplicas + 1));
+ TestWorkload workload(cluster_.get());
+ workload.Setup();
+
+ // Identify the tablet id and locate the replicas.
+ string tablet_id;
+ itest::TabletReplicaMap replica_map_;
+ string missing_replica_uuid;
+ ASSERT_EVENTUALLY([&] {
+ replica_map_.clear();
+ missing_replica_uuid.clear();
+ for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+ auto* ts = cluster_->mini_tablet_server(i);
+ auto tablet_ids = ts->ListTablets();
+ if (tablet_ids.empty()) {
+ missing_replica_uuid = ts->uuid();
+ continue;
+ }
+ ASSERT_EQ(1, tablet_ids.size());
+ if (!tablet_id.empty()) {
+ ASSERT_EQ(tablet_id, tablet_ids[0]);
+ }
+ tablet_id = tablet_ids[0];
+ replica_map_.emplace(tablet_id, ts_map_[ts->uuid()]);
+ }
+ ASSERT_EQ(kNumReplicas, replica_map_.count(tablet_id));
+ ASSERT_FALSE(tablet_id.empty());
+ });
+
+ // Wait until tablets are running.
+ auto range = replica_map_.equal_range(tablet_id);
+ for (auto it = range.first; it != range.second; ++it) {
+ auto ts = it->second;
+ ASSERT_OK(WaitUntilTabletRunning(ts, tablet_id, kTimeout));
+ }
+ // Elect a leader.
+ string leader_uuid = range.first->second->uuid();
+ ASSERT_OK(StartElection(ts_map_[leader_uuid], tablet_id, kTimeout));
+ ASSERT_OK(WaitUntilLeader(ts_map_[leader_uuid], tablet_id, kTimeout));
+
+ auto active_ts_map = ts_map_;
+ ASSERT_EQ(1, active_ts_map.erase(missing_replica_uuid));
+
+ // Ensure all servers are up to date.
+ ASSERT_OK(WaitForServersToAgree(kTimeout, active_ts_map, tablet_id,
+ /*minimum_index=*/ 1));
+
+ // Ensure that the failure detector activation state is consistent with the
+ // rule that only non-leader VOTER replicas should have failure detection
+ // enabled. Leaders and NON_VOTER replicas should not enable the FD.
+ auto validate_failure_detector_status = [&](const itest::TabletServerMap& ts_map) {
+ for (const auto& entry : ts_map) {
+ const auto& uuid = entry.first;
+ auto mini_ts = cluster_->mini_tablet_server_by_uuid(uuid);
+ vector<scoped_refptr<TabletReplica>> replicas;
+ mini_ts->server()->tablet_manager()->GetTabletReplicas(&replicas);
+ ASSERT_EQ(1, replicas.size());
+ const auto& replica = replicas[0];
+ ASSERT_EQ(replica->consensus()->role() == RaftPeerPB::FOLLOWER,
+ replica->consensus()->GetFailureDetectorForTests()->started());
+ }
+ };
+
+ NO_FATALS(validate_failure_detector_status(active_ts_map));
+
+ // Add a new non-voter.
+ ASSERT_OK(AddServer(ts_map_[leader_uuid], tablet_id,
+ ts_map_[missing_replica_uuid],
+ RaftPeerPB::NON_VOTER, kTimeout));
+ ASSERT_OK(WaitForServersToAgree(kTimeout, ts_map_, tablet_id,
+ /*minimum_index=*/ 2));
+
+ NO_FATALS(validate_failure_detector_status(ts_map_));
+}
+
+} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/mini-cluster/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h
index f879949..d969527 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -232,9 +232,7 @@ class ExternalMiniCluster : public MiniCluster {
// Return ExternalTabletServer given its UUID. If not found, returns NULL.
ExternalTabletServer* tablet_server_by_uuid(const std::string& uuid) const;
- // Return the index of the ExternalTabletServer that has the given 'uuid', or
- // -1 if no such UUID can be found.
- int tablet_server_index_by_uuid(const std::string& uuid) const;
+ int tablet_server_index_by_uuid(const std::string& uuid) const override;
// Return all tablet servers and masters.
std::vector<ExternalDaemon*> daemons() const;
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/mini-cluster/internal_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/internal_mini_cluster.cc b/src/kudu/mini-cluster/internal_mini_cluster.cc
index dbf7f6a..cfd0474 100644
--- a/src/kudu/mini-cluster/internal_mini_cluster.cc
+++ b/src/kudu/mini-cluster/internal_mini_cluster.cc
@@ -230,6 +230,24 @@ MiniTabletServer* InternalMiniCluster::mini_tablet_server(int idx) const {
return mini_tablet_servers_[idx].get();
}
+MiniTabletServer* InternalMiniCluster::mini_tablet_server_by_uuid(const string& uuid) const {
+ for (const auto& ts : mini_tablet_servers_) {
+ if (ts->uuid() == uuid) {
+ return ts.get();
+ }
+ }
+ return nullptr;
+}
+
+int InternalMiniCluster::tablet_server_index_by_uuid(const std::string& uuid) const {
+ for (int i = 0; i < mini_tablet_servers_.size(); i++) {
+ if (mini_tablet_servers_[i]->uuid() == uuid) {
+ return i;
+ }
+ }
+ return -1;
+}
+
vector<HostPort> InternalMiniCluster::master_rpc_addrs() const {
if (opts_.num_masters == 1) {
const auto& addr = CHECK_NOTNULL(mini_master(0))->bound_rpc_addr();
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/mini-cluster/internal_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/internal_mini_cluster.h b/src/kudu/mini-cluster/internal_mini_cluster.h
index b44717c..41eacdb 100644
--- a/src/kudu/mini-cluster/internal_mini_cluster.h
+++ b/src/kudu/mini-cluster/internal_mini_cluster.h
@@ -141,6 +141,11 @@ class InternalMiniCluster : public MiniCluster {
// 'idx' must be between 0 and 'num_tablet_servers' -1.
tserver::MiniTabletServer* mini_tablet_server(int idx) const;
+ // Returns the TabletServer with uuid 'uuid', or nullptr if not found.
+ tserver::MiniTabletServer* mini_tablet_server_by_uuid(const std::string& uuid) const;
+
+ int tablet_server_index_by_uuid(const std::string& uuid) const override;
+
int num_tablet_servers() const override {
return mini_tablet_servers_.size();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/mini-cluster/mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/mini_cluster.h b/src/kudu/mini-cluster/mini_cluster.h
index 8a1440c..f802c08 100644
--- a/src/kudu/mini-cluster/mini_cluster.h
+++ b/src/kudu/mini-cluster/mini_cluster.h
@@ -129,6 +129,10 @@ class MiniCluster {
virtual std::vector<HostPort> master_rpc_addrs() const = 0;
+ // Return the index of the tablet server that has the given 'uuid', or
+ // -1 if no such UUID can be found.
+ virtual int tablet_server_index_by_uuid(const std::string& uuid) const = 0;
+
// Create a client configured to talk to this cluster. 'builder' may contain
// override options for the client. The master address will be overridden to
// talk to the running master(s). If 'builder' is a nullptr, default options
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/rpc/periodic.cc
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/periodic.cc b/src/kudu/rpc/periodic.cc
index 07674de..3b9412d 100644
--- a/src/kudu/rpc/periodic.cc
+++ b/src/kudu/rpc/periodic.cc
@@ -117,6 +117,11 @@ void PeriodicTimer::SnoozeUnlocked(boost::optional<MonoDelta> next_task_delta) {
next_task_time_ = MonoTime::Now() + *next_task_delta;
}
+bool PeriodicTimer::started() const {
+ std::lock_guard<simple_spinlock> l(lock_);
+ return started_;
+}
+
MonoDelta PeriodicTimer::GetMinimumPeriod() {
// Given jitter percentage J and period P, this returns (1-J)*P, which is
// the lowest possible jittered value.
http://git-wip-us.apache.org/repos/asf/kudu/blob/d9eb7d4c/src/kudu/rpc/periodic.h
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/periodic.h b/src/kudu/rpc/periodic.h
index f65ed85..058e3ce 100644
--- a/src/kudu/rpc/periodic.h
+++ b/src/kudu/rpc/periodic.h
@@ -127,7 +127,7 @@ class PeriodicTimer : public std::enable_shared_from_this<PeriodicTimer> {
// otherwise the task is not guaranteed to run in a timely manner.
//
// Note: Snooze() is not additive. That is, if called at time X and again at
- // time X + P/2, the timer is snoozed until X+P/2, not X+2P.
+ // time X + P/2, the timer is snoozed until X+P/2+P, not X+2P.
//
// Does nothing if the timer is stopped.
void Snooze(boost::optional<MonoDelta> next_task_delta = boost::none);
@@ -141,6 +141,9 @@ class PeriodicTimer : public std::enable_shared_from_this<PeriodicTimer> {
// Does nothing if the timer is already stopped.
void Stop();
+ // Returns true iff the failure detected has been started.
+ bool started() const;
+
private:
FRIEND_TEST(PeriodicTimerTest, TestCallbackRestartsTimer);