You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jl...@apache.org on 2015/06/24 18:24:28 UTC
hadoop git commit: YARN-3809. Failed to launch new attempts because
ApplicationMasterLauncher's threads all hang. Contributed by Jun Gong
Repository: hadoop
Updated Branches:
refs/heads/trunk 72d08a0e4 -> 2a20dd9b6
YARN-3809. Failed to launch new attempts because ApplicationMasterLauncher's threads all hang. Contributed by Jun Gong
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2a20dd9b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2a20dd9b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2a20dd9b
Branch: refs/heads/trunk
Commit: 2a20dd9b61ba3833460cbda0e8c3e8b6366fc3ab
Parents: 72d08a0
Author: Jason Lowe <jl...@apache.org>
Authored: Wed Jun 24 16:23:48 2015 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Wed Jun 24 16:23:48 2015 +0000
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 ++
.../hadoop/yarn/conf/YarnConfiguration.java | 10 +++++++
.../src/main/resources/yarn-default.xml | 12 ++++++++
.../amlauncher/ApplicationMasterLauncher.java | 30 ++++++++++++++++++--
4 files changed, 52 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2a20dd9b/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index f235338..7ecdee3 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -699,6 +699,9 @@ Release 2.7.1 - UNRELEASED
YARN-3842. NMProxy should retry on NMNotYetReadyException.
(Robert Kanter via kasha)
+ YARN-3809. Failed to launch new attempts because
+ ApplicationMasterLauncher's threads all hang (Jun Gong via jlowe)
+
Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2a20dd9b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 5d75a21..6b660f7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -143,6 +143,16 @@ public class YarnConfiguration extends Configuration {
RM_PREFIX + "client.thread-count";
public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 50;
+ /** Number of threads used to launch/cleanup AM.*/
+ public static final String RM_AMLAUNCHER_THREAD_COUNT =
+ RM_PREFIX + "amlauncher.thread-count";
+ public static final int DEFAULT_RM_AMLAUNCHER_THREAD_COUNT = 50;
+
+ /** Retry times to connect with NM.*/
+ public static final String RM_NODEMANAGER_CONNECT_RETIRES =
+ RM_PREFIX + "nodemanager-connect-retries";
+ public static final int DEFAULT_RM_NODEMANAGER_CONNECT_RETIRES = 10;
+
/** The Kerberos principal for the resource manager.*/
public static final String RM_PRINCIPAL =
RM_PREFIX + "principal";
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2a20dd9b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index d94157c..621198c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -77,6 +77,18 @@
</property>
<property>
+ <description>Number of threads used to launch/cleanup AM.</description>
+ <name>yarn.resourcemanager.amlauncher.thread-count</name>
+ <value>50</value>
+ </property>
+
+ <property>
+ <description>Retry times to connect with NM.</description>
+ <name>yarn.resourcemanager.nodemanager-connect-retries</name>
+ <value>10</value>
+ </property>
+
+ <property>
<description>The expiry interval for application master reporting.</description>
<name>yarn.am.liveness-monitor.expiry-interval-ms</name>
<value>600000</value>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2a20dd9b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
index 5fc39fd..f606e45 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
@@ -19,12 +19,17 @@
package org.apache.hadoop.yarn.server.resourcemanager.amlauncher;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@@ -34,7 +39,7 @@ public class ApplicationMasterLauncher extends AbstractService implements
EventHandler<AMLauncherEvent> {
private static final Log LOG = LogFactory.getLog(
ApplicationMasterLauncher.class);
- private final ThreadPoolExecutor launcherPool;
+ private ThreadPoolExecutor launcherPool;
private LauncherThread launcherHandlingThread;
private final BlockingQueue<Runnable> masterEvents
@@ -45,12 +50,31 @@ public class ApplicationMasterLauncher extends AbstractService implements
public ApplicationMasterLauncher(RMContext context) {
super(ApplicationMasterLauncher.class.getName());
this.context = context;
- this.launcherPool = new ThreadPoolExecutor(10, 10, 1,
- TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>());
this.launcherHandlingThread = new LauncherThread();
}
@Override
+ protected void serviceInit(Configuration conf) throws Exception {
+ int threadCount = conf.getInt(
+ YarnConfiguration.RM_AMLAUNCHER_THREAD_COUNT,
+ YarnConfiguration.DEFAULT_RM_AMLAUNCHER_THREAD_COUNT);
+ ThreadFactory tf = new ThreadFactoryBuilder()
+ .setNameFormat("ApplicationMasterLauncher #%d")
+ .build();
+ launcherPool = new ThreadPoolExecutor(threadCount, threadCount, 1,
+ TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>());
+ launcherPool.setThreadFactory(tf);
+
+ Configuration newConf = new YarnConfiguration(conf);
+ newConf.setInt(CommonConfigurationKeysPublic.
+ IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+ conf.getInt(YarnConfiguration.RM_NODEMANAGER_CONNECT_RETIRES,
+ YarnConfiguration.DEFAULT_RM_NODEMANAGER_CONNECT_RETIRES));
+ setConfig(newConf);
+ super.serviceInit(newConf);
+ }
+
+ @Override
protected void serviceStart() throws Exception {
launcherHandlingThread.start();
super.serviceStart();