You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jh...@apache.org on 2016/08/22 21:35:31 UTC
hadoop git commit: HDFS-10761: libhdfs++: Fix broken logic in HA
retry policy. Contributed by James Clampffer
Repository: hadoop
Updated Branches:
refs/heads/HDFS-8707 c64f61285 -> 4a74bc4fc
HDFS-10761: libhdfs++: Fix broken logic in HA retry policy. Contributed by James Clampffer
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4a74bc4f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4a74bc4f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4a74bc4f
Branch: refs/heads/HDFS-8707
Commit: 4a74bc4fcbfd5144410cfc58e8926ef4fbbf6efd
Parents: c64f612
Author: James <jh...@apache.org>
Authored: Mon Aug 22 17:34:59 2016 -0400
Committer: James <jh...@apache.org>
Committed: Mon Aug 22 17:34:59 2016 -0400
----------------------------------------------------------------------
.../native/libhdfspp/lib/common/retry_policy.cc | 11 +++--
.../main/native/libhdfspp/lib/rpc/rpc_engine.cc | 48 +++++++++-----------
2 files changed, 27 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/4a74bc4f/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
index a885d53..ef78aca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
@@ -56,20 +56,21 @@ RetryAction FixedDelayWithFailover::ShouldRetry(const Status &s, uint64_t retrie
(void)isIdempotentOrAtMostOnce;
LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
- if(s.code() == ::asio::error::timed_out && failovers < max_failover_retries_) {
+ if(failovers < max_failover_retries_ && (s.code() == ::asio::error::timed_out || s.get_server_exception_type() == Status::kStandbyException) )
+ {
// Try connecting to another NN in case this one keeps timing out
// Can add the backoff wait specified by dfs.client.failover.sleep.base.millis here
return RetryAction::failover(delay_);
}
- if(retries < max_retries_) {
- LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_");
+ if(retries < max_retries_ && failovers < max_failover_retries_) {
+ LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_ && failovers < max_failover_retries_");
return RetryAction::retry(delay_);
} else if (retries >= max_retries_ && failovers < max_failover_retries_) {
LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers < max_failover_retries_");
return RetryAction::failover(delay_);
- } else if (retries >= max_retries_ && failovers == max_failover_retries_) {
- LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers == max_failover_retries_");
+ } else if (retries <= max_retries_ && failovers == max_failover_retries_) {
+ LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries <= max_retries_ && failovers == max_failover_retries_");
// 1 last retry on new connection
return RetryAction::retry(delay_);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/4a74bc4f/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc
index be69d95..89d6a67 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc
+++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc
@@ -291,39 +291,33 @@ void RpcEngine::RpcCommsError(
optional<RetryAction> head_action = optional<RetryAction>();
- //We are talking to the Standby NN, let's talk to the active one instead.
- if(ha_persisted_info_ && status.get_server_exception_type() == Status::kStandbyException) {
- LOG_INFO(kRPC, << "Received StandbyException. Failing over.");
- head_action = RetryAction::failover(std::max(0,options_.rpc_retry_delay_ms));
- } else {
- // Filter out anything with too many retries already
- for (auto it = pendingRequests.begin(); it < pendingRequests.end();) {
- auto req = *it;
+ // Filter out anything with too many retries already
+ for (auto it = pendingRequests.begin(); it < pendingRequests.end();) {
+ auto req = *it;
- LOG_DEBUG(kRPC, << req->GetDebugString());
+ LOG_DEBUG(kRPC, << req->GetDebugString());
- RetryAction retry = RetryAction::fail(""); // Default to fail
+ RetryAction retry = RetryAction::fail(""); // Default to fail
- if (retry_policy()) {
- retry = retry_policy()->ShouldRetry(status, req->IncrementRetryCount(), req->get_failover_count(), true);
- }
+ if (retry_policy()) {
+ retry = retry_policy()->ShouldRetry(status, req->IncrementRetryCount(), req->get_failover_count(), true);
+ }
- if (retry.action == RetryAction::FAIL) {
- // If we've exceeded the maximum retry, take the latest error and pass it
- // on. There might be a good argument for caching the first error
- // rather than the last one, that gets messy
+ if (retry.action == RetryAction::FAIL) {
+ // If we've exceeded the maximum retry, take the latest error and pass it
+ // on. There might be a good argument for caching the first error
+ // rather than the last one, that gets messy
- io_service().post([req, status]() {
- req->OnResponseArrived(nullptr, status); // Never call back while holding a lock
- });
- it = pendingRequests.erase(it);
- } else {
- if (!head_action) {
- head_action = retry;
- }
-
- ++it;
+ io_service().post([req, status]() {
+ req->OnResponseArrived(nullptr, status); // Never call back while holding a lock
+ });
+ it = pendingRequests.erase(it);
+ } else {
+ if (!head_action) {
+ head_action = retry;
}
+
+ ++it;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org