You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by tg...@apache.org on 2013/01/09 22:00:47 UTC
svn commit: r1431070 - in /hadoop/common/trunk/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/
hadoop-yarn/hadoop-yarn-server/hado...
Author: tgraves
Date: Wed Jan 9 21:00:47 2013
New Revision: 1431070
URL: http://svn.apache.org/viewvc?rev=1431070&view=rev
Log:
YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is called and a container is completing (Arun C Murthy via tgraves)
Modified:
hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Wed Jan 9 21:00:47 2013
@@ -290,6 +290,9 @@ Release 0.23.6 - UNRELEASED
YARN-320. RM should always be able to renew its own tokens.
(Daryn Sharp via sseth)
+ YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is
+ called and a container is completing (Arun C Murthy via tgraves)
+
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java Wed Jan 9 21:00:47 2013
@@ -20,19 +20,33 @@ package org.apache.hadoop.yarn.server.re
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
@Private
@Unstable
public class CSAssignment {
final private Resource resource;
private NodeType type;
+ private final RMContainer excessReservation;
+ private final FiCaSchedulerApp application;
public CSAssignment(Resource resource, NodeType type) {
this.resource = resource;
this.type = type;
+ this.application = null;
+ this.excessReservation = null;
+ }
+
+ public CSAssignment(FiCaSchedulerApp application, RMContainer excessReservation) {
+ this.resource = excessReservation.getContainer().getResource();
+ this.type = NodeType.NODE_LOCAL;
+ this.application = application;
+ this.excessReservation = excessReservation;
}
+
public Resource getResource() {
return resource;
}
@@ -45,6 +59,14 @@ public class CSAssignment {
this.type = type;
}
+ public FiCaSchedulerApp getApplication() {
+ return application;
+ }
+
+ public RMContainer getExcessReservation() {
+ return excessReservation;
+ }
+
@Override
public String toString() {
return resource.getMemory() + ":" + type;
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java Wed Jan 9 21:00:47 2013
@@ -604,7 +604,20 @@ implements ResourceScheduler, CapacitySc
reservedApplication.getApplicationId() + " on node: " + nm);
LeafQueue queue = ((LeafQueue)reservedApplication.getQueue());
- queue.assignContainers(clusterResource, node);
+ CSAssignment assignment = queue.assignContainers(clusterResource, node);
+
+ RMContainer excessReservation = assignment.getExcessReservation();
+ if (excessReservation != null) {
+ Container container = excessReservation.getContainer();
+ queue.completedContainer(
+ clusterResource, assignment.getApplication(), node,
+ excessReservation,
+ SchedulerUtils.createAbnormalContainerStatus(
+ container.getId(),
+ SchedulerUtils.UNRESERVED_CONTAINER),
+ RMContainerEventType.RELEASED);
+ }
+
}
// Try to schedule more if there are no reservations to fulfill
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java Wed Jan 9 21:00:47 2013
@@ -62,7 +62,6 @@ import org.apache.hadoop.yarn.server.res
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -781,11 +780,9 @@ public class LeafQueue implements CSQueu
if (reservedContainer != null) {
FiCaSchedulerApp application =
getApplication(reservedContainer.getApplicationAttemptId());
- return new CSAssignment(
+ return
assignReservedContainer(application, node, reservedContainer,
- clusterResource),
- NodeType.NODE_LOCAL); // Don't care about locality constraints
- // for reserved containers
+ clusterResource);
}
// Try to assign containers to applications in order
@@ -873,20 +870,14 @@ public class LeafQueue implements CSQueu
}
- private synchronized Resource assignReservedContainer(FiCaSchedulerApp application,
+ private synchronized CSAssignment
+ assignReservedContainer(FiCaSchedulerApp application,
FiCaSchedulerNode node, RMContainer rmContainer, Resource clusterResource) {
// Do we still need this reservation?
Priority priority = rmContainer.getReservedPriority();
if (application.getTotalRequiredResources(priority) == 0) {
// Release
- Container container = rmContainer.getContainer();
- completedContainer(clusterResource, application, node,
- rmContainer,
- SchedulerUtils.createAbnormalContainerStatus(
- container.getId(),
- SchedulerUtils.UNRESERVED_CONTAINER),
- RMContainerEventType.RELEASED);
- return container.getResource(); // Ugh, return resource to force re-sort
+ return new CSAssignment(application, rmContainer);
}
// Try to assign if we have sufficient resources
@@ -895,7 +886,7 @@ public class LeafQueue implements CSQueu
// Doesn't matter... since it's already charged for at time of reservation
// "re-reservation" is *free*
- return Resources.none();
+ return new CSAssignment(Resources.none(), NodeType.NODE_LOCAL);
}
private synchronized boolean assignToQueue(Resource clusterResource,
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java?rev=1431070&r1=1431069&r2=1431070&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java Wed Jan 9 21:00:47 2013
@@ -1181,12 +1181,14 @@ public class TestLeafQueue {
// Now finish another container from app_0 and see the reservation cancelled
a.completedContainer(clusterResource, app_0, node_0,
app_0.getLiveContainers().iterator().next(), null, RMContainerEventType.KILL);
- a.assignContainers(clusterResource, node_0);
- assertEquals(4*GB, a.getUsedResources().getMemory());
+ CSAssignment assignment = a.assignContainers(clusterResource, node_0);
+ assertEquals(8*GB, a.getUsedResources().getMemory());
assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
assertEquals(4*GB, app_1.getCurrentConsumption().getMemory());
- assertEquals(0*GB, app_1.getCurrentReservation().getMemory());
+ assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
assertEquals(0*GB, node_0.getUsedResource().getMemory());
+ assertEquals(4*GB,
+ assignment.getExcessReservation().getContainer().getResource().getMemory());
}