You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/11/27 06:16:30 UTC
[6/6] kudu git commit: KUDU-1097 (patch 1): Make leader report config
member health to master
KUDU-1097 (patch 1): Make leader report config member health to master
Passes existing tests (when the feature is disabled).
Tests will be added in a follow-up patch.
Change-Id: Ia5081cbe0c0d81733a781d4729211dd0c530cdfa
Reviewed-on: http://gerrit.cloudera.org:8080/8630
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/88e39bad
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/88e39bad
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/88e39bad
Branch: refs/heads/master
Commit: 88e39bad14cfab17882d72b69d3382c219b93c23
Parents: b436845
Author: Mike Percy <mp...@apache.org>
Authored: Tue Nov 21 20:44:18 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Mon Nov 27 06:13:55 2017 +0000
----------------------------------------------------------------------
src/kudu/consensus/consensus_queue.cc | 175 +++++++++++++++----
src/kudu/consensus/consensus_queue.h | 28 ++-
src/kudu/consensus/raft_consensus.cc | 34 +++-
src/kudu/consensus/raft_consensus.h | 18 +-
.../integration-tests/raft_consensus-itest.cc | 3 +-
src/kudu/tserver/ts_tablet_manager.cc | 8 +-
6 files changed, 224 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/consensus/consensus_queue.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_queue.cc b/src/kudu/consensus/consensus_queue.cc
index 0bf75a7..b3897f7 100644
--- a/src/kudu/consensus/consensus_queue.cc
+++ b/src/kudu/consensus/consensus_queue.cc
@@ -48,6 +48,7 @@
#include "kudu/util/logging.h"
#include "kudu/util/metrics.h"
#include "kudu/util/pb_util.h"
+#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/threadpool.h"
#include "kudu/util/url-coding.h"
@@ -70,12 +71,14 @@ TAG_FLAG(consensus_inject_latency_ms_in_notifications, unsafe);
DECLARE_int32(consensus_rpc_timeout_ms);
DECLARE_bool(safe_time_advancement_without_writes);
+DECLARE_bool(raft_prepare_replacement_before_eviction);
using kudu::log::Log;
using kudu::pb_util::SecureDebugString;
using kudu::pb_util::SecureShortDebugString;
using std::string;
using std::unique_ptr;
+using std::unordered_map;
using std::vector;
using strings::Substitute;
@@ -244,6 +247,25 @@ void PeerMessageQueue::UntrackPeer(const string& uuid) {
}
}
+unordered_map<string, HealthReportPB> PeerMessageQueue::ReportHealthOfPeers() const {
+ unordered_map<string, HealthReportPB> reports;
+ std::lock_guard<simple_spinlock> lock(queue_lock_);
+ for (const auto& entry : peers_map_) {
+ const string& peer_uuid = entry.first;
+ const TrackedPeer* peer = entry.second;
+ HealthReportPB report;
+ auto overall_health = peer->last_overall_health_status;
+ // We always consider the local peer (ourselves) to be healthy.
+ // TODO(mpercy): Is this always a safe assumption?
+ if (peer_uuid == local_peer_pb_.permanent_uuid()) {
+ overall_health = HealthReportPB::HEALTHY;
+ }
+ report.set_overall_health(overall_health);
+ reports.emplace(peer_uuid, std::move(report));
+ }
+ return reports;
+}
+
void PeerMessageQueue::CheckPeersInActiveConfigIfLeaderUnlocked() const {
DCHECK(queue_lock_.is_locked());
if (queue_state_.mode != LEADER) return;
@@ -369,20 +391,20 @@ OpId PeerMessageQueue::GetNextOpId() const {
queue_state_.last_appended.index() + 1);
}
-bool PeerMessageQueue::SafeToEvict(const string& evict_uuid) {
+bool PeerMessageQueue::SafeToEvictUnlocked(const string& evict_uuid) const {
+ DCHECK(queue_lock_.is_locked());
auto now = MonoTime::Now();
- std::lock_guard<simple_spinlock> lock(queue_lock_);
int remaining_voters = 0;
int remaining_viable_voters = 0;
for (const auto& e : peers_map_) {
const auto& uuid = e.first;
const auto& peer = e.second;
- if (!IsRaftConfigVoter(uuid, *queue_state_.active_config)) {
+ if (uuid == evict_uuid) {
continue;
}
- if (uuid == evict_uuid) {
+ if (!IsRaftConfigVoter(uuid, *queue_state_.active_config)) {
continue;
}
remaining_voters++;
@@ -429,6 +451,76 @@ bool PeerMessageQueue::SafeToEvict(const string& evict_uuid) {
return true;
}
+void PeerMessageQueue::UpdatePeerHealthUnlocked(TrackedPeer* peer) {
+ DCHECK(queue_lock_.is_locked());
+
+ auto overall_health_status = PeerHealthStatus(*peer);
+
+ // Prepare error messages for different conditions.
+ string error_msg;
+ if (overall_health_status == HealthReportPB::FAILED) {
+ if (peer->last_exchange_status == PeerStatus::TABLET_FAILED) {
+ error_msg = Substitute("The tablet replica hosted on peer $0 has failed", peer->uuid);
+ } else if (!peer->wal_catchup_possible) {
+ error_msg = Substitute("The logs necessary to catch up peer $0 have been "
+ "garbage collected. The replica will never be able "
+ "to catch up", peer->uuid);
+ } else {
+ error_msg = Substitute("Leader has been unable to successfully communicate "
+ "with peer $0 for more than $1 seconds ($2)",
+ peer->uuid,
+ FLAGS_follower_unavailable_considered_failed_sec,
+ (MonoTime::Now() - peer->last_communication_time).ToString());
+ }
+ }
+
+ bool changed = overall_health_status != peer->last_overall_health_status;
+ peer->last_overall_health_status = overall_health_status;
+
+ if (FLAGS_raft_prepare_replacement_before_eviction) {
+ if (changed) {
+ if (overall_health_status == HealthReportPB::FAILED) {
+ // Only log when the status changes to FAILED.
+ LOG_WITH_PREFIX_UNLOCKED(INFO) << error_msg;
+ }
+ // Only notify when there is a change.
+ NotifyObserversOfPeerHealthChange();
+ }
+ } else {
+ if (overall_health_status == HealthReportPB::FAILED &&
+ SafeToEvictUnlocked(peer->uuid)) {
+ NotifyObserversOfFailedFollower(peer->uuid, queue_state_.current_term, error_msg);
+ }
+ }
+}
+
+HealthReportPB::HealthStatus PeerMessageQueue::PeerHealthStatus(const TrackedPeer& peer) {
+ // Unreachable peers are considered failed.
+ auto max_unreachable = MonoDelta::FromSeconds(FLAGS_follower_unavailable_considered_failed_sec);
+ if (MonoTime::Now() - peer.last_communication_time > max_unreachable) {
+ return HealthReportPB::FAILED;
+ }
+
+ // Replicas returning TABLET_FAILED status are considered to have FAILED health.
+ if (peer.last_exchange_status == PeerStatus::TABLET_FAILED) {
+ return HealthReportPB::FAILED;
+ }
+
+ // If we have never connected to this peer before, and we have not exceeded
+ // the unreachable timeout, its health is unknown.
+ if (peer.last_exchange_status == PeerStatus::NEW) {
+ return HealthReportPB::UNKNOWN;
+ }
+
+ // Tablets that have fallen behind the leader's retained WAL are considered failed.
+ if (!peer.wal_catchup_possible) {
+ return HealthReportPB::FAILED;
+ }
+
+ // All other cases are considered healthy.
+ return HealthReportPB::HEALTHY;
+}
+
Status PeerMessageQueue::RequestForPeer(const string& uuid,
ConsensusRequestPB* request,
vector<ReplicateRefPtr>* msg_refs,
@@ -436,18 +528,18 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
// Maintain a thread-safe copy of necessary members.
OpId preceding_id;
int64_t current_term;
- TrackedPeer peer;
+ TrackedPeer peer_copy;
MonoDelta unreachable_time;
{
std::lock_guard<simple_spinlock> lock(queue_lock_);
DCHECK_EQ(queue_state_.state, kQueueOpen);
DCHECK_NE(uuid, local_peer_pb_.permanent_uuid());
- TrackedPeer* peer_ptr = FindPtrOrNull(peers_map_, uuid);
- if (PREDICT_FALSE(peer_ptr == nullptr || queue_state_.mode == NON_LEADER)) {
+ TrackedPeer* peer = FindPtrOrNull(peers_map_, uuid);
+ if (PREDICT_FALSE(peer == nullptr || queue_state_.mode == NON_LEADER)) {
return Status::NotFound("Peer not tracked or queue not in leader mode.");
}
- peer = *peer_ptr;
+ peer_copy = *peer;
// Clear the requests without deleting the entries, as they may be in use by other peers.
request->mutable_ops()->ExtractSubrange(0, request->ops_size(), nullptr);
@@ -461,19 +553,22 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
request->set_all_replicated_index(queue_state_.all_replicated_index);
request->set_last_idx_appended_to_leader(queue_state_.last_appended.index());
request->set_caller_term(current_term);
- unreachable_time = MonoTime::Now() - peer.last_communication_time;
- }
- if (unreachable_time.ToSeconds() > FLAGS_follower_unavailable_considered_failed_sec) {
- if (SafeToEvict(uuid)) {
- string msg = Substitute("Leader has been unable to successfully communicate "
- "with Peer $0 for more than $1 seconds ($2)",
- uuid,
- FLAGS_follower_unavailable_considered_failed_sec,
- unreachable_time.ToString());
- NotifyObserversOfFailedFollower(uuid, current_term, msg);
- }
+ unreachable_time = MonoTime::Now() - peer_copy.last_communication_time;
}
- if (peer.last_exchange_status == PeerStatus::TABLET_NOT_FOUND) {
+
+ // Always trigger a health status update check at the end of this function.
+ bool wal_catchup_progress = false;
+ bool wal_catchup_failure = false;
+ SCOPED_CLEANUP({
+ std::lock_guard<simple_spinlock> lock(queue_lock_);
+ TrackedPeer* peer = FindPtrOrNull(peers_map_, uuid);
+ if (!peer) return;
+ if (wal_catchup_progress) peer->wal_catchup_possible = true;
+ if (wal_catchup_failure) peer->wal_catchup_possible = false;
+ UpdatePeerHealthUnlocked(peer);
+ });
+
+ if (peer_copy.last_exchange_status == PeerStatus::TABLET_NOT_FOUND) {
VLOG(3) << LogPrefixUnlocked() << "Peer " << uuid << " needs tablet copy" << THROTTLE_MSG;
*needs_tablet_copy = true;
return Status::OK();
@@ -483,14 +578,14 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
// If we've never communicated with the peer, we don't know what messages to
// send, so we'll send a status-only request. Otherwise, we grab requests
// from the log starting at the last_received point.
- if (peer.last_exchange_status != PeerStatus::NEW) {
+ if (peer_copy.last_exchange_status != PeerStatus::NEW) {
// The batch of messages to send to the peer.
vector<ReplicateRefPtr> messages;
int max_batch_size = FLAGS_consensus_max_batch_size_bytes - request->ByteSize();
// We try to get the follower's next_index from our log.
- Status s = log_cache_.ReadOps(peer.next_index - 1,
+ Status s = log_cache_.ReadOps(peer_copy.next_index - 1,
max_batch_size,
&messages,
&preceding_id);
@@ -501,7 +596,7 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
string msg = Substitute("The logs necessary to catch up peer $0 have been "
"garbage collected. The follower will never be able "
"to catch up ($1)", uuid, s.ToString());
- NotifyObserversOfFailedFollower(uuid, current_term, msg);
+ wal_catchup_failure = true;
return s;
// IsIncomplete() means that we tried to read beyond the head of the log
// (in the future). See KUDU-1078.
@@ -510,14 +605,18 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
LOG_WITH_PREFIX_UNLOCKED(ERROR) << "Error trying to read ahead of the log "
<< "while preparing peer request: "
<< s.ToString() << ". Destination peer: "
- << peer.ToString();
+ << peer_copy.ToString();
return s;
}
LOG_WITH_PREFIX_UNLOCKED(FATAL) << "Error reading the log while preparing peer request: "
<< s.ToString() << ". Destination peer: "
- << peer.ToString();
+ << peer_copy.ToString();
}
+ // Since we were able to read ops through the log cache, we know that
+ // catchup is possible.
+ wal_catchup_progress = true;
+
// We use AddAllocated rather than copy, because we pin the log cache at the
// "all replicated" point. At some point we may want to allow partially loading
// (and not pinning) earlier messages. At that point we'll need to do something
@@ -537,7 +636,7 @@ Status PeerMessageQueue::RequestForPeer(const string& uuid,
if (request->ops_size() > 0) {
int64_t last_op_sent = request->ops(request->ops_size() - 1).id().index();
if (last_op_sent < request->committed_index()) {
- KLOG_EVERY_N_SECS_THROTTLER(INFO, 3, peer.status_log_throttler, "lagging")
+ KLOG_EVERY_N_SECS_THROTTLER(INFO, 3, peer_copy.status_log_throttler, "lagging")
<< LogPrefixUnlocked() << "Peer " << uuid << " is lagging by at least "
<< (request->committed_index() - last_op_sent)
<< " ops behind the committed index " << THROTTLE_MSG;
@@ -719,11 +818,7 @@ void PeerMessageQueue::UpdatePeerStatus(const string& peer_uuid,
break;
case PeerStatus::TABLET_FAILED: {
- // Use the current term to ensure the peer will be evicted, otherwise this
- // notification may be ignored.
- int64_t current_term = queue_state_.current_term;
- l.unlock();
- NotifyObserversOfFailedFollower(peer_uuid, current_term, status.ToString());
+ UpdatePeerHealthUnlocked(peer);
return;
}
@@ -1150,6 +1245,24 @@ void PeerMessageQueue::NotifyObserversOfFailedFollowerTask(const string& uuid,
}
}
+void PeerMessageQueue::NotifyObserversOfPeerHealthChange() {
+ WARN_NOT_OK(raft_pool_observers_token_->SubmitClosure(
+ Bind(&PeerMessageQueue::NotifyObserversOfPeerHealthChangeTask, Unretained(this))),
+ LogPrefixUnlocked() + "Unable to notify RaftConsensus peer health change.");
+}
+
+void PeerMessageQueue::NotifyObserversOfPeerHealthChangeTask() {
+ MAYBE_INJECT_RANDOM_LATENCY(FLAGS_consensus_inject_latency_ms_in_notifications);
+ std::vector<PeerMessageQueueObserver*> observers_copy;
+ {
+ std::lock_guard<simple_spinlock> lock(queue_lock_);
+ observers_copy = observers_;
+ }
+ for (PeerMessageQueueObserver* observer : observers_copy) {
+ observer->NotifyPeerHealthChange();
+ }
+}
+
PeerMessageQueue::~PeerMessageQueue() {
Close();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/consensus/consensus_queue.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_queue.h b/src/kudu/consensus/consensus_queue.h
index c3cf620..871089b 100644
--- a/src/kudu/consensus/consensus_queue.h
+++ b/src/kudu/consensus/consensus_queue.h
@@ -120,6 +120,8 @@ class PeerMessageQueue {
last_known_committed_index(MinimumOpId().index()),
last_exchange_status(PeerStatus::NEW),
last_communication_time(MonoTime::Now()),
+ wal_catchup_possible(true),
+ last_overall_health_status(HealthReportPB::UNKNOWN),
last_seen_term_(0) {}
TrackedPeer() = default;
@@ -165,6 +167,13 @@ class PeerMessageQueue {
// successful communication ever took place.
MonoTime last_communication_time;
+ // Set to false if it is determined that the remote peer has fallen behind
+ // the local peer's WAL.
+ bool wal_catchup_possible;
+
+ // The peer's latest overall health status.
+ HealthReportPB::HealthStatus last_overall_health_status;
+
// Throttler for how often we will log status messages pertaining to this
// peer (eg when it is lagging, etc).
logging::LogThrottler status_log_throttler;
@@ -211,6 +220,10 @@ class PeerMessageQueue {
// Makes the queue untrack this peer.
void UntrackPeer(const std::string& uuid);
+ // Returns a health report for all active peers.
+ // Returns IllegalState if the local peer is not the leader of the config.
+ std::unordered_map<std::string, HealthReportPB> ReportHealthOfPeers() const;
+
// Appends a single message to be replicated to the peers.
// Returns OK unless the message could not be added to the queue for some
// reason (e.g. the queue reached max size).
@@ -419,7 +432,14 @@ class PeerMessageQueue {
// Return true if it would be safe to evict the peer 'evict_uuid' at this
// point in time.
- bool SafeToEvict(const std::string& evict_uuid);
+ bool SafeToEvictUnlocked(const std::string& evict_uuid) const;
+
+ // Update a peer's last_health_status field and trigger the appropriate
+ // notifications.
+ void UpdatePeerHealthUnlocked(TrackedPeer* peer);
+
+ // Calculate a peer's up-to-date health status based on internal fields.
+ static HealthReportPB::HealthStatus PeerHealthStatus(const TrackedPeer& peer);
void NotifyObserversOfCommitIndexChange(int64_t new_commit_index);
void NotifyObserversOfCommitIndexChangeTask(int64_t new_commit_index);
@@ -434,6 +454,9 @@ class PeerMessageQueue {
int64_t term,
const std::string& reason);
+ void NotifyObserversOfPeerHealthChange();
+ void NotifyObserversOfPeerHealthChangeTask();
+
typedef std::unordered_map<std::string, TrackedPeer*> PeersMap;
std::string ToStringUnlocked() const;
@@ -520,6 +543,9 @@ class PeerMessageQueueObserver {
int64_t term,
const std::string& reason) = 0;
+ // Notify the observer that the health of one of the peers has changed.
+ virtual void NotifyPeerHealthChange() = 0;
+
virtual ~PeerMessageQueueObserver() {}
};
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index 58cc087..45066bb 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -24,8 +24,9 @@
#include <memory>
#include <mutex>
#include <ostream>
-#include <unordered_set>
#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
#include <boost/optional/optional.hpp>
#include <gflags/gflags.h>
@@ -778,6 +779,10 @@ void RaftConsensus::NotifyFailedFollower(const string& uuid,
LogPrefixThreadSafe() + "Unable to start RemoteFollowerTask");
}
+void RaftConsensus::NotifyPeerHealthChange() {
+ MarkDirty("Peer health change");
+}
+
void RaftConsensus::TryRemoveFollowerTask(const string& uuid,
const RaftConfigPB& committed_config,
const std::string& reason) {
@@ -2166,10 +2171,31 @@ const string& RaftConsensus::tablet_id() const {
return options_.tablet_id;
}
-ConsensusStatePB RaftConsensus::ConsensusState() const {
+ConsensusStatePB RaftConsensus::ConsensusState(IncludeHealthReport report_health) const {
ThreadRestrictions::AssertWaitAllowed();
- LockGuard l(lock_);
- return cmeta_->ToConsensusStatePB();
+ UniqueLock l(lock_);
+ ConsensusStatePB cstate = cmeta_->ToConsensusStatePB();
+
+ // If we need to include the health report, merge it into the committed
+ // config iff we believe we are the current leader of the config.
+ if (report_health == INCLUDE_HEALTH_REPORT &&
+ cmeta_->active_role() == RaftPeerPB::LEADER) {
+ auto reports = queue_->ReportHealthOfPeers();
+
+ // We don't need to access the queue anymore, so drop the consensus lock.
+ l.unlock();
+
+ // Iterate through each peer in the committed config and attach the health
+ // report to it.
+ RaftConfigPB* committed_raft_config = cstate.mutable_committed_config();
+ for (int i = 0; i < committed_raft_config->peers_size(); i++) {
+ RaftPeerPB* peer = committed_raft_config->mutable_peers(i);
+ const HealthReportPB* report = FindOrNull(reports, peer->permanent_uuid());
+ if (!report) continue; // Only attach details if we know about the peer.
+ *peer->mutable_health_report() = *report;
+ }
+ }
+ return cstate;
}
RaftConfigPB RaftConsensus::CommittedConfig() const {
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/consensus/raft_consensus.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h
index 7229bb7..fb43e10 100644
--- a/src/kudu/consensus/raft_consensus.h
+++ b/src/kudu/consensus/raft_consensus.h
@@ -277,8 +277,16 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
scoped_refptr<TimeManager> time_manager() const { return time_manager_; }
+ enum IncludeHealthReport {
+ EXCLUDE_HEALTH_REPORT,
+ INCLUDE_HEALTH_REPORT
+ };
+
// Returns a copy of the state of the consensus system.
- ConsensusStatePB ConsensusState() const;
+ // If 'report_health' is set to 'INCLUDE_HEALTH_REPORT', and if the
+ // local replica believes it is the leader of the config, it will include a
+ // health report about each active peer in the committed config.
+ ConsensusStatePB ConsensusState(IncludeHealthReport report_health = EXCLUDE_HEALTH_REPORT) const;
// Returns a copy of the current committed Raft configuration.
RaftConfigPB CommittedConfig() const;
@@ -309,13 +317,15 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
// Updates the committed_index and triggers the Apply()s for whatever
// transactions were pending.
// This is idempotent.
- void NotifyCommitIndex(int64_t commit_index);
+ void NotifyCommitIndex(int64_t commit_index) override;
- void NotifyTermChange(int64_t term);
+ void NotifyTermChange(int64_t term) override;
void NotifyFailedFollower(const std::string& uuid,
int64_t term,
- const std::string& reason);
+ const std::string& reason) override;
+
+ void NotifyPeerHealthChange() override;
// Return the log indexes which the consensus implementation would like to retain.
//
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index e41a331..c31d7eb 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -2424,7 +2424,8 @@ TEST_F(RaftConsensusITest, TestUpdateConsensusErrorNonePrepared) {
TEST_F(RaftConsensusITest, TestCorruptReplicaMetadata) {
// Start cluster and wait until we have a stable leader.
// Switch off tombstoning of evicted replicas to observe the failed tablet state.
- NO_FATALS(BuildAndStart({}, { "--master_tombstone_evicted_tablet_replicas=false" }));
+ NO_FATALS(BuildAndStart({ "--consensus_rpc_timeout_ms=10000" }, // Ensure we are safe to evict.
+ { "--master_tombstone_evicted_tablet_replicas=false" }));
ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(10), tablet_servers_,
tablet_id_, 1));
http://git-wip-us.apache.org/repos/asf/kudu/blob/88e39bad/src/kudu/tserver/ts_tablet_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index 9e234ec..bef2d9d 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -29,6 +29,7 @@
#include <boost/bind.hpp> // IWYU pragma: keep
#include <boost/optional/optional.hpp>
#include <gflags/gflags.h>
+#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include "kudu/clock/clock.h"
@@ -118,6 +119,8 @@ DEFINE_int32(tablet_state_walk_min_period_ms, 1000,
"tablet map to update tablet state counts.");
TAG_FLAG(tablet_state_walk_min_period_ms, advanced);
+DECLARE_bool(raft_prepare_replacement_before_eviction);
+
METRIC_DEFINE_gauge_int32(server, tablets_num_not_initialized,
"Number of Not Initialized Tablets",
kudu::MetricUnit::kTablets,
@@ -1153,7 +1156,10 @@ void TSTabletManager::CreateReportedTabletPB(const scoped_refptr<TabletReplica>&
// We cannot get consensus state information unless the TabletReplica is running.
shared_ptr<consensus::RaftConsensus> consensus = replica->shared_consensus();
if (consensus) {
- *reported_tablet->mutable_consensus_state() = consensus->ConsensusState();
+ auto include_health = FLAGS_raft_prepare_replacement_before_eviction ?
+ RaftConsensus::INCLUDE_HEALTH_REPORT :
+ RaftConsensus::EXCLUDE_HEALTH_REPORT;
+ *reported_tablet->mutable_consensus_state() = consensus->ConsensusState(include_health);
}
}