You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/01/10 23:05:17 UTC
[kudu] 01/09: Reduce election-related logging
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8fb302289b7fe8eefa9aa3cd9fb1ae97ac4ae5ec
Author: Will Berkeley <wd...@gmail.org>
AuthorDate: Tue Jan 8 16:03:14 2019 -0800
Reduce election-related logging
Frequent simultaneous pre-elections on lots of tablets are a common
symptom of underlying problems on Kudu clusters. This results in a ton
of log spam because a [pre-]election results in multiple log messages.
This patch makes a few conservative changes to reduce the amount of
election-related logging, especially on candidates.
1. No log message per successful VoteRequest response. The candidate
previously logged a message for every granted or denied vote from a
voter, e.g.
I1221 20:16:10.479441 1852 leader_election.cc:387] T cdd0e560bc5c451c978e38c0d7620e52 P dd658299c5194f4b88ae408cd3130103 [CANDIDATE]: Term 1 pre-election: Vote granted by peer 1a228ec03dbc4bb79ef66cd036a99bca (127.1.92.67:38265)
This seems like overkill. Instead, these messages have been moved to
VLOG(1) and the candidate will print a more detailed summary of the
election once it is decided:
I0109 12:39:42.203627 158707712 leader_election.cc:304] T 00000000000000000000000000000000 P 99b33389ebdc47b1ae18947dea49a5b7 [CANDIDATE]: Term 2 pre-election: Election decided. Result: candidate won. Election summary: received 2 responses out of 3 voters: 2 yes votes; 0 no votes. yes voters: 465a338e21d84ad1a4e03867eb608623, 99b33389ebdc47b1ae18947dea49a5b7; no voters:
Compared to the current state:
I1221 20:16:10.479472 1852 leader_election.cc:278] T cdd0e560bc5c451c978e38c0d7620e52 P dd658299c5194f4b88ae408cd3130103 [CANDIDATE]: Term 1 pre-election: Election decided. Result: candidate won.
Note that it is still possible to find a record of each vote in each
voter's INFO logs, and error condition logging has not been changed.
2. No log message per vote request. The candidate previously logged a
message for each voter it was requesting a vote from. e.g.
I1221 20:16:10.474704 1993 leader_election.cc:251] T cdd0e560bc5c451c978e38c0d7620e52 P 1a228ec03dbc4bb79ef66cd036a99bca [CANDIDATE]: Term 1 pre-election: Requesting pre-vote from peer dd658299c5194f4b88ae408cd3130103 (127.1.92.66:39093)
I1221 20:16:10.475266 1993 leader_election.cc:251] T cdd0e560bc5c451c978e38c0d7620e52 P 1a228ec03dbc4bb79ef66cd036a99bca [CANDIDATE]: Term 1 pre-election: Requesting pre-vote from peer c624ece2977e486fbab958afb000f122 (127.1.92.65:45651)
Instead, the candidate will summarize all the voters it sent requests
to:
I0109 12:39:42.206408 156561408 leader_election.cc:291] T 00000000000000000000000000000000 P 99b33389ebdc47b1ae18947dea49a5b7 [CANDIDATE]: Term 2 election: Requested vote from peers 465a338e21d84ad1a4e03867eb608623 (127.0.0.1:7053), 08b8de9c7a7c4ec5b72c9eb275d29cfd (127.0.0.1:7052)
3. No logging of failure detector sleeps. It seems like it might be
useful to log when the FD sleeps, and why, e.g.
I1221 20:16:12.399258 2803 raft_consensus.cc:2834] T cdd0e560bc5c451c978e38c0d7620e52 P c624ece2977e486fbab958afb000f122: Snoozing failure detection for 0.224s (vote granted)
But frankly in practice it's never been relevant or useful. Since a
message like this is produced multiple times on the candidate and
possibly on the voters during an election, I moved it to VLOG(1)
level to reduce the total amount of logging from elections.
Change-Id: Ie0ab1a37c2b2e9a3f37baf74772b694b907c6adf
Reviewed-on: http://gerrit.cloudera.org:8080/12200
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/consensus/leader_election.cc | 50 ++++++++++++++++++++++++++++-------
src/kudu/consensus/leader_election.h | 3 +++
src/kudu/consensus/raft_consensus.cc | 8 +++---
3 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/src/kudu/consensus/leader_election.cc b/src/kudu/consensus/leader_election.cc
index bd2c8a9..931e5ef 100644
--- a/src/kudu/consensus/leader_election.cc
+++ b/src/kudu/consensus/leader_election.cc
@@ -21,6 +21,7 @@
#include <mutex>
#include <ostream>
#include <type_traits>
+#include <utility>
#include <vector>
#include <boost/bind.hpp> // IWYU pragma: keep
@@ -132,6 +133,29 @@ bool VoteCounter::AreAllVotesIn() const {
return GetTotalVotesCounted() == num_voters_;
}
+string VoteCounter::GetElectionSummary() const {
+ vector<string> yes_voter_uuids;
+ vector<string> no_voter_uuids;
+ for (const auto& entry : votes_) {
+ switch (entry.second) {
+ case VOTE_GRANTED:
+ yes_voter_uuids.push_back(entry.first);
+ break;
+ case VOTE_DENIED:
+ no_voter_uuids.push_back(entry.first);
+ break;
+ }
+ }
+ return Substitute("received $0 responses out of $1 voters: $2 yes votes; "
+ "$3 no votes. yes voters: $4; no voters: $5",
+ yes_votes_ + no_votes_,
+ num_voters_,
+ yes_votes_,
+ no_votes_,
+ JoinStrings(yes_voter_uuids, ", "),
+ JoinStrings(no_voter_uuids, ", "));
+}
+
///////////////////////////////////////////////////
// ElectionResult
///////////////////////////////////////////////////
@@ -224,6 +248,8 @@ void LeaderElection::Run() {
CheckForDecision();
// The rest of the code below is for a typical multi-node configuration.
+ vector<string> other_voter_info;
+ other_voter_info.reserve(other_voter_uuids.size());
for (const auto& voter_uuid : other_voter_uuids) {
VoterState* state = nullptr;
{
@@ -232,6 +258,7 @@ void LeaderElection::Run() {
// Safe to drop the lock because voter_state_ is not mutated outside of
// the constructor / destructor. We do this to avoid deadlocks below.
}
+ other_voter_info.push_back(state->PeerInfo());
// If we failed to construct the proxy, just record a 'NO' vote with the status
// that indicates why it failed.
@@ -248,9 +275,6 @@ void LeaderElection::Run() {
}
// Send the RPC request.
- LOG_WITH_PREFIX(INFO) << "Requesting "
- << (request_.is_pre_election() ? "pre-" : "")
- << "vote from peer " << state->PeerInfo();
state->rpc.set_timeout(timeout_);
state->request = request_;
@@ -265,6 +289,9 @@ void LeaderElection::Run() {
boost::bind(&Closure::Run,
Bind(&LeaderElection::VoteResponseRpcCallback, this, voter_uuid)));
}
+ LOG_WITH_PREFIX(INFO) << Substitute("Requested $0vote from peers $1",
+ request_.is_pre_election() ? "pre-" : "",
+ JoinStrings(other_voter_info, ", "));
}
void LeaderElection::CheckForDecision() {
@@ -275,9 +302,12 @@ void LeaderElection::CheckForDecision() {
if (!result_ && vote_counter_->IsDecided()) {
ElectionVote decision;
CHECK_OK(vote_counter_->GetDecision(&decision));
- LOG_WITH_PREFIX(INFO) << "Election decided. Result: candidate "
- << ((decision == VOTE_GRANTED) ? "won." : "lost.");
- string msg = (decision == VOTE_GRANTED) ?
+ const auto election_won = decision == VOTE_GRANTED;
+ LOG_WITH_PREFIX(INFO) << Substitute("Election decided. Result: candidate $0. "
+ "Election summary: $1",
+ election_won ? "won" : "lost",
+ vote_counter_->GetElectionSummary());
+ string msg = election_won ?
"achieved majority votes" : "could not achieve majority";
result_.reset(new ElectionResult(request_, decision, highest_voter_term_, msg));
}
@@ -384,7 +414,7 @@ void LeaderElection::HandleVoteGrantedUnlocked(const VoterState& state) {
}
DCHECK(state.response.vote_granted());
- LOG_WITH_PREFIX(INFO) << "Vote granted by peer " << state.PeerInfo();
+ VLOG_WITH_PREFIX(1) << "Vote granted by peer " << state.PeerInfo();
RecordVoteUnlocked(state, VOTE_GRANTED);
}
@@ -398,8 +428,10 @@ void LeaderElection::HandleVoteDeniedUnlocked(const VoterState& state) {
return HandleHigherTermUnlocked(state);
}
- LOG_WITH_PREFIX(INFO) << "Vote denied by peer " << state.PeerInfo() << ". Message: "
- << StatusFromPB(state.response.consensus_error().status()).ToString();
+ VLOG_WITH_PREFIX(1) << Substitute(
+ "Vote denied by peer $0. Message: $1",
+ state.PeerInfo(),
+ StatusFromPB(state.response.consensus_error().status()).ToString());
RecordVoteUnlocked(state, VOTE_DENIED);
}
diff --git a/src/kudu/consensus/leader_election.h b/src/kudu/consensus/leader_election.h
index 5e145e2..8770906 100644
--- a/src/kudu/consensus/leader_election.h
+++ b/src/kudu/consensus/leader_election.h
@@ -78,6 +78,9 @@ class VoteCounter {
// Return true iff GetTotalVotesCounted() == num_voters_;
bool AreAllVotesIn() const;
+ // Return a summary of the election so far, suitable for logging.
+ std::string GetElectionSummary() const;
+
private:
friend class VoteCounterTest;
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index d71f8e9..7428fe0 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -2831,10 +2831,10 @@ void RaftConsensus::SnoozeFailureDetector(boost::optional<string> reason_for_log
boost::optional<MonoDelta> delta) {
if (PREDICT_TRUE(failure_detector_ && FLAGS_enable_leader_failure_detection)) {
if (reason_for_log) {
- LOG(INFO) << LogPrefixThreadSafe()
- << Substitute("Snoozing failure detection for $0 ($1)",
- delta ? delta->ToString() : "election timeout",
- *reason_for_log);
+ VLOG(1) << LogPrefixThreadSafe()
+ << Substitute("Snoozing failure detection for $0 ($1)",
+ delta ? delta->ToString() : "election timeout",
+ *reason_for_log);
}
if (!delta) {