You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2015/02/27 02:05:47 UTC
hadoop git commit: YARN-3251. Fixed a deadlock in CapacityScheduler
when computing absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via
wangda)
Repository: hadoop
Updated Branches:
refs/heads/branch-2.6 5b3d9bf63 -> 881084fe5
YARN-3251. Fixed a deadlock in CapacityScheduler when computing absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/881084fe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/881084fe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/881084fe
Branch: refs/heads/branch-2.6
Commit: 881084fe5c3118c1f62585aa1b72262d46d74ac6
Parents: 5b3d9bf
Author: Wangda Tan <wa...@apache.org>
Authored: Thu Feb 26 17:05:25 2015 -0800
Committer: Wangda Tan <wa...@apache.org>
Committed: Thu Feb 26 17:05:25 2015 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +++
.../scheduler/capacity/LeafQueue.java | 24 ++++++++++++++------
2 files changed, 20 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/881084fe/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 3eb6fbc..c603c50 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -619,6 +619,9 @@ Release 2.6.0 - 2014-11-18
identifiers to be tampered and thus causing app submission failures in
secure mode. (Jian He via vinodkv)
+ YARN-3251. Fixed a deadlock in CapacityScheduler when computing
+ absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)
+
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
YARN-1707. Introduce APIs to add/remove/resize queues in the
http://git-wip-us.apache.org/repos/asf/hadoop/blob/881084fe/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index ffeec63..eddf30f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -115,6 +115,8 @@ public class LeafQueue extends AbstractCSQueue {
private final QueueHeadroomInfo queueHeadroomInfo = new QueueHeadroomInfo();
+ private volatile float absoluteMaxAvailCapacity;
+
public LeafQueue(CapacitySchedulerContext cs,
String queueName, CSQueue parent, CSQueue old) throws IOException {
super(cs, queueName, parent, old);
@@ -133,6 +135,10 @@ public class LeafQueue extends AbstractCSQueue {
(float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
float absoluteMaxCapacity =
CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
+
+ // Initially set to absoluteMax, will be updated to more accurate
+ // max avail value during assignContainers
+ absoluteMaxAvailCapacity = absoluteMaxCapacity;
int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
float userLimitFactor =
@@ -720,8 +726,18 @@ public class LeafQueue extends AbstractCSQueue {
}
@Override
- public synchronized CSAssignment assignContainers(Resource clusterResource,
+ public CSAssignment assignContainers(Resource clusterResource,
FiCaSchedulerNode node, boolean needToUnreserve) {
+ //We should not hold a lock on a queue and its parent concurrently - it
+ //can lead to deadlocks when calls which walk down the tree occur
+ //concurrently (getQueueInfo...)
+ absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
+ resourceCalculator, clusterResource, this);
+ return assignContainersInternal(clusterResource, node, needToUnreserve);
+ }
+
+ private synchronized CSAssignment assignContainersInternal(
+ Resource clusterResource, FiCaSchedulerNode node, boolean needToUnreserve) {
if(LOG.isDebugEnabled()) {
LOG.debug("assignContainers: node=" + node.getNodeName()
@@ -1012,12 +1028,6 @@ public class LeafQueue extends AbstractCSQueue {
computeUserLimit(application, clusterResource, required,
queueUser, requestedLabels);
- //Max avail capacity needs to take into account usage by ancestor-siblings
- //which are greater than their base capacity, so we are interested in "max avail"
- //capacity
- float absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
- resourceCalculator, clusterResource, this);
-
Resource queueMaxCap = // Queue Max-Capacity
Resources.multiplyAndNormalizeDown(
resourceCalculator,