You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2015/02/27 02:05:47 UTC

hadoop git commit: YARN-3251. Fixed a deadlock in CapacityScheduler when computing absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)

Repository: hadoop
Updated Branches:
  refs/heads/branch-2.6 5b3d9bf63 -> 881084fe5


YARN-3251. Fixed a deadlock in CapacityScheduler when computing absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/881084fe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/881084fe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/881084fe

Branch: refs/heads/branch-2.6
Commit: 881084fe5c3118c1f62585aa1b72262d46d74ac6
Parents: 5b3d9bf
Author: Wangda Tan <wa...@apache.org>
Authored: Thu Feb 26 17:05:25 2015 -0800
Committer: Wangda Tan <wa...@apache.org>
Committed: Thu Feb 26 17:05:25 2015 -0800

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +++
 .../scheduler/capacity/LeafQueue.java           | 24 ++++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/881084fe/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 3eb6fbc..c603c50 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -619,6 +619,9 @@ Release 2.6.0 - 2014-11-18
     identifiers to be tampered and thus causing app submission failures in
     secure mode. (Jian He via vinodkv)
 
+    YARN-3251. Fixed a deadlock in CapacityScheduler when computing 
+    absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)
+ 
   BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
 
     YARN-1707. Introduce APIs to add/remove/resize queues in the

http://git-wip-us.apache.org/repos/asf/hadoop/blob/881084fe/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index ffeec63..eddf30f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -115,6 +115,8 @@ public class LeafQueue extends AbstractCSQueue {
   
   private final QueueHeadroomInfo queueHeadroomInfo = new QueueHeadroomInfo();
   
+  private volatile float absoluteMaxAvailCapacity;
+  
   public LeafQueue(CapacitySchedulerContext cs, 
       String queueName, CSQueue parent, CSQueue old) throws IOException {
     super(cs, queueName, parent, old);
@@ -133,6 +135,10 @@ public class LeafQueue extends AbstractCSQueue {
         (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
     float absoluteMaxCapacity = 
         CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
+        
+    // Initially set to absoluteMax, will be updated to more accurate
+    // max avail value during assignContainers
+    absoluteMaxAvailCapacity = absoluteMaxCapacity;
 
     int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
     float userLimitFactor = 
@@ -720,8 +726,18 @@ public class LeafQueue extends AbstractCSQueue {
   }
   
   @Override
-  public synchronized CSAssignment assignContainers(Resource clusterResource,
+  public CSAssignment assignContainers(Resource clusterResource,
       FiCaSchedulerNode node, boolean needToUnreserve) {
+    //We should not hold a lock on a queue and its parent concurrently - it
+    //can lead to deadlocks when calls which walk down the tree occur
+    //concurrently (getQueueInfo...)
+    absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
+      resourceCalculator, clusterResource, this);
+    return assignContainersInternal(clusterResource, node, needToUnreserve);
+  }
+  
+  private synchronized CSAssignment assignContainersInternal(
+    Resource clusterResource, FiCaSchedulerNode node, boolean needToUnreserve) {
 
     if(LOG.isDebugEnabled()) {
       LOG.debug("assignContainers: node=" + node.getNodeName()
@@ -1012,12 +1028,6 @@ public class LeafQueue extends AbstractCSQueue {
         computeUserLimit(application, clusterResource, required,
             queueUser, requestedLabels);
 
-    //Max avail capacity needs to take into account usage by ancestor-siblings
-    //which are greater than their base capacity, so we are interested in "max avail"
-    //capacity
-    float absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
-      resourceCalculator, clusterResource, this);
-
     Resource queueMaxCap =                        // Queue Max-Capacity
         Resources.multiplyAndNormalizeDown(
             resourceCalculator,