You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by tg...@apache.org on 2013/01/09 22:00:47 UTC

svn commit: r1431070 - in /hadoop/common/trunk/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ hadoop-yarn/hadoop-yarn-server/hado...

Author: tgraves
Date: Wed Jan  9 21:00:47 2013
New Revision: 1431070

URL: http://svn.apache.org/viewvc?rev=1431070&view=rev
Log:
YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is called and a container is completing (Arun C Murthy via tgraves)

Modified:
    hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java

Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Wed Jan  9 21:00:47 2013
@@ -290,6 +290,9 @@ Release 0.23.6 - UNRELEASED
     YARN-320. RM should always be able to renew its own tokens. 
     (Daryn Sharp via sseth)
 
+    YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is 
+    called and a container is completing (Arun C Murthy via tgraves)
+
 Release 0.23.5 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java Wed Jan  9 21:00:47 2013
@@ -20,19 +20,33 @@ package org.apache.hadoop.yarn.server.re
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 
 @Private
 @Unstable
 public class CSAssignment {
   final private Resource resource;
   private NodeType type;
+  private final RMContainer excessReservation;
+  private final FiCaSchedulerApp application;
   
   public CSAssignment(Resource resource, NodeType type) {
     this.resource = resource;
     this.type = type;
+    this.application = null;
+    this.excessReservation = null;
+  }
+  
+  public CSAssignment(FiCaSchedulerApp application, RMContainer excessReservation) {
+    this.resource = excessReservation.getContainer().getResource();
+    this.type = NodeType.NODE_LOCAL;
+    this.application = application;
+    this.excessReservation = excessReservation;
   }
 
+
   public Resource getResource() {
     return resource;
   }
@@ -45,6 +59,14 @@ public class CSAssignment {
     this.type = type;
   }
   
+  public FiCaSchedulerApp getApplication() {
+    return application;
+  }
+
+  public RMContainer getExcessReservation() {
+    return excessReservation;
+  }
+
   @Override
   public String toString() {
     return resource.getMemory() + ":" + type;

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java Wed Jan  9 21:00:47 2013
@@ -604,7 +604,20 @@ implements ResourceScheduler, CapacitySc
           reservedApplication.getApplicationId() + " on node: " + nm);
       
       LeafQueue queue = ((LeafQueue)reservedApplication.getQueue());
-      queue.assignContainers(clusterResource, node);
+      CSAssignment assignment = queue.assignContainers(clusterResource, node);
+      
+      RMContainer excessReservation = assignment.getExcessReservation();
+      if (excessReservation != null) {
+      Container container = excessReservation.getContainer();
+      queue.completedContainer(
+          clusterResource, assignment.getApplication(), node, 
+          excessReservation, 
+          SchedulerUtils.createAbnormalContainerStatus(
+              container.getId(), 
+              SchedulerUtils.UNRESERVED_CONTAINER), 
+          RMContainerEventType.RELEASED);
+      }
+
     }
 
     // Try to schedule more if there are no reservations to fulfill

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java Wed Jan  9 21:00:47 2013
@@ -62,7 +62,6 @@ import org.apache.hadoop.yarn.server.res
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -781,11 +780,9 @@ public class LeafQueue implements CSQueu
     if (reservedContainer != null) {
       FiCaSchedulerApp application = 
           getApplication(reservedContainer.getApplicationAttemptId());
-      return new CSAssignment(
+      return 
           assignReservedContainer(application, node, reservedContainer, 
-              clusterResource),
-          NodeType.NODE_LOCAL); // Don't care about locality constraints 
-                                // for reserved containers
+              clusterResource); 
     }
     
     // Try to assign containers to applications in order
@@ -873,20 +870,14 @@ public class LeafQueue implements CSQueu
 
   }
 
-  private synchronized Resource assignReservedContainer(FiCaSchedulerApp application, 
+  private synchronized CSAssignment 
+  assignReservedContainer(FiCaSchedulerApp application, 
       FiCaSchedulerNode node, RMContainer rmContainer, Resource clusterResource) {
     // Do we still need this reservation?
     Priority priority = rmContainer.getReservedPriority();
     if (application.getTotalRequiredResources(priority) == 0) {
       // Release
-      Container container = rmContainer.getContainer();
-      completedContainer(clusterResource, application, node, 
-          rmContainer, 
-          SchedulerUtils.createAbnormalContainerStatus(
-              container.getId(), 
-              SchedulerUtils.UNRESERVED_CONTAINER), 
-          RMContainerEventType.RELEASED);
-      return container.getResource(); // Ugh, return resource to force re-sort
+      return new CSAssignment(application, rmContainer);
     }
 
     // Try to assign if we have sufficient resources
@@ -895,7 +886,7 @@ public class LeafQueue implements CSQueu
     
     // Doesn't matter... since it's already charged for at time of reservation
     // "re-reservation" is *free*
-    return Resources.none();
+    return new CSAssignment(Resources.none(), NodeType.NODE_LOCAL);
   }
 
   private synchronized boolean assignToQueue(Resource clusterResource, 

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java Wed Jan  9 21:00:47 2013
@@ -1181,12 +1181,14 @@ public class TestLeafQueue {
     // Now finish another container from app_0 and see the reservation cancelled
     a.completedContainer(clusterResource, app_0, node_0, 
         app_0.getLiveContainers().iterator().next(), null, RMContainerEventType.KILL);
-    a.assignContainers(clusterResource, node_0);
-    assertEquals(4*GB, a.getUsedResources().getMemory());
+    CSAssignment assignment = a.assignContainers(clusterResource, node_0);
+    assertEquals(8*GB, a.getUsedResources().getMemory());
     assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(4*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0*GB, app_1.getCurrentReservation().getMemory());
+    assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(0*GB, node_0.getUsedResource().getMemory());
+    assertEquals(4*GB, 
+        assignment.getExcessReservation().getContainer().getResource().getMemory());
   }