You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ep...@apache.org on 2020/11/05 22:32:22 UTC
[hadoop] branch branch-3.2 updated: YARN-10479. RMProxy should
retry on SocketTimeout Exceptions. Contributed by Jim Brennan (Jim_Brennan)
This is an automated email from the ASF dual-hosted git repository.
epayne pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new d6a55ca YARN-10479. RMProxy should retry on SocketTimeout Exceptions. Contributed by Jim Brennan (Jim_Brennan)
d6a55ca is described below
commit d6a55caa9acb0dc85aaf04f4c78dd0e89fd6c26f
Author: Eric E Payne <ep...@apache.org>
AuthorDate: Thu Nov 5 21:50:46 2020 +0000
YARN-10479. RMProxy should retry on SocketTimeout Exceptions. Contributed by Jim Brennan (Jim_Brennan)
(cherry picked from commit 55339c2bddeeb98e63f09882420505e3cfaedad1)
---
.../org/apache/hadoop/yarn/client/RMProxy.java | 2 ++
.../server/nodemanager/TestNodeStatusUpdater.java | 40 +++++++++++++++++-----
2 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java
index f7cb47a..c0b0c6e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java
@@ -24,6 +24,7 @@ import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.net.NoRouteToHostException;
import java.net.SocketException;
+import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.security.PrivilegedAction;
import java.util.HashMap;
@@ -275,6 +276,7 @@ public class RMProxy<T> {
exceptionToPolicyMap.put(ConnectTimeoutException.class, retryPolicy);
exceptionToPolicyMap.put(RetriableException.class, retryPolicy);
exceptionToPolicyMap.put(SocketException.class, retryPolicy);
+ exceptionToPolicyMap.put(SocketTimeoutException.class, retryPolicy);
exceptionToPolicyMap.put(StandbyException.class, retryPolicy);
// YARN-4288: local IOException is also possible.
exceptionToPolicyMap.put(IOException.class, retryPolicy);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 6bc2c8b..43a8d3b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -371,12 +371,15 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
private final long rmStartIntervalMS;
private final boolean rmNeverStart;
public ResourceTracker resourceTracker;
+ private final boolean useSocketTimeoutEx;
public MyNodeStatusUpdater4(Context context, Dispatcher dispatcher,
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
- long rmStartIntervalMS, boolean rmNeverStart) {
+ long rmStartIntervalMS, boolean rmNeverStart,
+ boolean useSocketTimeoutEx) {
super(context, dispatcher, healthChecker, metrics);
this.rmStartIntervalMS = rmStartIntervalMS;
this.rmNeverStart = rmNeverStart;
+ this.useSocketTimeoutEx = useSocketTimeoutEx;
}
@Override
@@ -391,7 +394,8 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
HAUtil.isHAEnabled(conf));
resourceTracker =
(ResourceTracker) RetryProxy.create(ResourceTracker.class,
- new MyResourceTracker6(rmStartIntervalMS, rmNeverStart),
+ new MyResourceTracker6(rmStartIntervalMS, rmNeverStart,
+ useSocketTimeoutEx),
retryPolicy);
return resourceTracker;
}
@@ -824,11 +828,14 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
private long rmStartIntervalMS;
private boolean rmNeverStart;
private final long waitStartTime;
+ private final boolean useSocketTimeoutEx;
- public MyResourceTracker6(long rmStartIntervalMS, boolean rmNeverStart) {
+ MyResourceTracker6(long rmStartIntervalMS, boolean rmNeverStart,
+ boolean useSocketTimeoutEx) {
this.rmStartIntervalMS = rmStartIntervalMS;
this.rmNeverStart = rmNeverStart;
this.waitStartTime = System.currentTimeMillis();
+ this.useSocketTimeoutEx = useSocketTimeoutEx;
}
@Override
@@ -837,8 +844,13 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
IOException {
if (System.currentTimeMillis() - waitStartTime <= rmStartIntervalMS
|| rmNeverStart) {
- throw new java.net.ConnectException("Faking RM start failure as start "
- + "delay timer has not expired.");
+ if (useSocketTimeoutEx) {
+ throw new java.net.SocketTimeoutException(
+ "Faking RM start failure as start delay timer has not expired.");
+ } else {
+ throw new java.net.ConnectException(
+ "Faking RM start failure as start delay timer has not expired.");
+ }
} else {
NodeId nodeId = request.getNodeId();
Resource resource = request.getResource();
@@ -1340,8 +1352,8 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
}
}
- @Test (timeout = 150000)
- public void testNMConnectionToRM() throws Exception {
+ private void testNMConnectionToRMInternal(boolean useSocketTimeoutEx)
+ throws Exception {
final long delta = 50000;
final long connectionWaitMs = 5000;
final long connectionRetryIntervalMs = 1000;
@@ -1360,7 +1372,7 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(
context, dispatcher, healthChecker, metrics,
- rmStartIntervalMS, true);
+ rmStartIntervalMS, true, useSocketTimeoutEx);
return nodeStatusUpdater;
}
};
@@ -1392,7 +1404,7 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(
context, dispatcher, healthChecker, metrics, rmStartIntervalMS,
- false);
+ false, useSocketTimeoutEx);
return nodeStatusUpdater;
}
};
@@ -1423,6 +1435,16 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
(duration < (rmStartIntervalMS + delta)));
}
+ @Test (timeout = 150000)
+ public void testNMConnectionToRM() throws Exception {
+ testNMConnectionToRMInternal(false);
+ }
+
+ @Test (timeout = 150000)
+ public void testNMConnectionToRMwithSocketTimeout() throws Exception {
+ testNMConnectionToRMInternal(true);
+ }
+
/**
* Verifies that if for some reason NM fails to start ContainerManager RPC
* server, RM is oblivious to NM's presence. The behaviour is like this
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org