You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2016/01/16 01:10:32 UTC
[06/43] hadoop git commit: YARN-4414. Nodemanager connection errors
are retried at multiple levels. Contributed by Chang Li
YARN-4414. Nodemanager connection errors are retried at multiple levels. Contributed by Chang Li
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/13de8359
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/13de8359
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/13de8359
Branch: refs/heads/HDFS-1312
Commit: 13de8359a1c6d9fc78cd5013c860c1086d86176f
Parents: c77c35f
Author: Jason Lowe <jl...@apache.org>
Authored: Tue Jan 12 15:56:15 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Tue Jan 12 15:56:15 2016 +0000
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 6 ++++
.../org/apache/hadoop/yarn/client/NMProxy.java | 9 ++++--
.../apache/hadoop/yarn/client/ServerProxy.java | 1 +
.../containermanager/TestNMProxy.java | 34 ++++++++++++++++++--
4 files changed, 45 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/13de8359/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 1f38c77..20440c1 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -1301,6 +1301,9 @@ Release 2.7.3 - UNRELEASED
YARN-3695. ServerProxy (NMProxy, etc.) shouldn't retry forever for non
network exception. (Raju Bairishetti via jianhe)
+ YARN-4414. Nodemanager connection errors are retried at multiple levels
+ (Chang Li via jlowe)
+
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES
@@ -2184,6 +2187,9 @@ Release 2.6.4 - UNRELEASED
YARN-3695. ServerProxy (NMProxy, etc.) shouldn't retry forever for non
network exception. (Raju Bairishetti via jianhe)
+ YARN-4414. Nodemanager connection errors are retried at multiple levels
+ (Chang Li via jlowe)
+
Release 2.6.3 - 2015-12-17
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/13de8359/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/NMProxy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/NMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/NMProxy.java
index dd40b45..68816bb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/NMProxy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/NMProxy.java
@@ -23,6 +23,7 @@ import java.net.InetSocketAddress;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -42,8 +43,12 @@ public class NMProxy extends ServerProxy {
YarnConfiguration.DEFAULT_CLIENT_NM_CONNECT_MAX_WAIT_MS,
YarnConfiguration.CLIENT_NM_CONNECT_RETRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_CLIENT_NM_CONNECT_RETRY_INTERVAL_MS);
-
- return createRetriableProxy(conf, protocol, ugi, rpc, serverAddress,
+ Configuration confClone = new Configuration(conf);
+ confClone.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0);
+ confClone.setInt(CommonConfigurationKeysPublic.
+ IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0);
+ return createRetriableProxy(confClone, protocol, ugi, rpc, serverAddress,
retryPolicy);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/13de8359/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java
index 8c30fd7..e42435f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java
@@ -77,6 +77,7 @@ public class ServerProxy {
exceptionToPolicyMap.put(ConnectException.class, retryPolicy);
exceptionToPolicyMap.put(NoRouteToHostException.class, retryPolicy);
exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy);
+ exceptionToPolicyMap.put(ConnectTimeoutException.class, retryPolicy);
exceptionToPolicyMap.put(RetriableException.class, retryPolicy);
exceptionToPolicyMap.put(SocketException.class, retryPolicy);
exceptionToPolicyMap.put(NMNotYetReadyException.class, retryPolicy);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/13de8359/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java
index 152b92c..7ce15c5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager;
import java.io.IOException;
import java.net.InetSocketAddress;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.io.retry.UnreliableInterface;
import org.apache.hadoop.security.SecurityUtil;
@@ -127,7 +129,7 @@ public class TestNMProxy extends BaseContainerManagerTest {
StartContainersRequest allRequests =
Records.newRecord(StartContainersRequest.class);
- ContainerManagementProtocol proxy = getNMProxy();
+ ContainerManagementProtocol proxy = getNMProxy(conf);
retryCount = 0;
shouldThrowNMNotYetReadyException = false;
@@ -158,14 +160,40 @@ public class TestNMProxy extends BaseContainerManagerTest {
StartContainersRequest allRequests =
Records.newRecord(StartContainersRequest.class);
- ContainerManagementProtocol proxy = getNMProxy();
+ ContainerManagementProtocol proxy = getNMProxy(conf);
shouldThrowNMNotYetReadyException = false;
retryCount = 0;
proxy.startContainers(allRequests);
}
- private ContainerManagementProtocol getNMProxy() {
+ @Test(timeout = 20000)
+ public void testNMProxyRPCRetry() throws Exception {
+ conf.setLong(YarnConfiguration.CLIENT_NM_CONNECT_MAX_WAIT_MS, 1000);
+ conf.setLong(YarnConfiguration.CLIENT_NM_CONNECT_RETRY_INTERVAL_MS, 100);
+ StartContainersRequest allRequests =
+ Records.newRecord(StartContainersRequest.class);
+ Configuration newConf = new YarnConfiguration(conf);
+ newConf.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 100);
+
+ newConf.setInt(CommonConfigurationKeysPublic.
+ IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 100);
+ // connect to some dummy address so that it can trigger
+ // connection failure and RPC level retires.
+ newConf.set(YarnConfiguration.NM_ADDRESS, "1234");
+ ContainerManagementProtocol proxy = getNMProxy(newConf);
+ try {
+ proxy.startContainers(allRequests);
+ Assert.fail("should get socket exception");
+ } catch (IOException e) {
+ // socket exception should be thrown immediately, without RPC retries.
+ Assert.assertTrue(e.toString().
+ contains("Failed on local exception: java.net.SocketException"));
+ }
+ }
+
+ private ContainerManagementProtocol getNMProxy(Configuration conf) {
ApplicationId appId = ApplicationId.newInstance(1, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);