You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2014/10/07 22:47:44 UTC
git commit: YARN-1857. CapacityScheduler headroom doesn't account for
other AM's running. Contributed by Chen He and Craig Welch
Repository: hadoop
Updated Branches:
refs/heads/trunk 9196db9a0 -> 30d56fdbb
YARN-1857. CapacityScheduler headroom doesn't account for other AM's running. Contributed by Chen He and Craig Welch
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/30d56fdb
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/30d56fdb
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/30d56fdb
Branch: refs/heads/trunk
Commit: 30d56fdbb40d06c4e267d6c314c8c767a7adc6a3
Parents: 9196db9
Author: Jian He <ji...@apache.org>
Authored: Tue Oct 7 13:43:12 2014 -0700
Committer: Jian He <ji...@apache.org>
Committed: Tue Oct 7 13:45:04 2014 -0700
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +
.../scheduler/capacity/LeafQueue.java | 31 ++--
.../scheduler/capacity/TestLeafQueue.java | 146 ++++++++++++++++++-
3 files changed, 168 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30d56fdb/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 5cb2fc7..4bd9241 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -585,6 +585,9 @@ Release 2.6.0 - UNRELEASED
YARN-2644. Fixed CapacityScheduler to return up-to-date headroom when
AM allocates. (Craig Welch via jianhe)
+ YARN-1857. CapacityScheduler headroom doesn't account for other AM's running.
+ (Chen He and Craig Welch via jianhe)
+
Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30d56fdb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 57f0907..cab0318 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -981,11 +981,28 @@ public class LeafQueue implements CSQueue {
private Resource getHeadroom(User user, Resource queueMaxCap,
Resource clusterResource, Resource userLimit) {
+ /**
+ * Headroom is:
+ * min(
+ * min(userLimit, queueMaxCap) - userConsumed,
+ * queueMaxCap - queueUsedResources
+ * )
+ *
+ * ( which can be expressed as,
+ * min (userLimit - userConsumed, queuMaxCap - userConsumed,
+ * queueMaxCap - queueUsedResources)
+ * )
+ *
+ * given that queueUsedResources >= userConsumed, this simplifies to
+ *
+ * >> min (userlimit - userConsumed, queueMaxCap - queueUsedResources) <<
+ *
+ */
Resource headroom =
- Resources.subtract(
- Resources.min(resourceCalculator, clusterResource,
- userLimit, queueMaxCap),
- user.getConsumedResources());
+ Resources.min(resourceCalculator, clusterResource,
+ Resources.subtract(userLimit, user.getConsumedResources()),
+ Resources.subtract(queueMaxCap, usedResources)
+ );
return headroom;
}
@@ -1051,16 +1068,12 @@ public class LeafQueue implements CSQueue {
@Lock({LeafQueue.class, FiCaSchedulerApp.class})
- private Resource computeUserLimitAndSetHeadroom(
+ Resource computeUserLimitAndSetHeadroom(
FiCaSchedulerApp application, Resource clusterResource, Resource required) {
String user = application.getUser();
User queueUser = getUser(user);
-
- /**
- * Headroom is min((userLimit, queue-max-cap) - consumed)
- */
Resource userLimit = // User limit
computeUserLimit(application, clusterResource, required, queueUser);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/30d56fdb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
index 9e06c52..9208082 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -215,6 +215,7 @@ public class TestLeafQueue {
conf.setCapacity(Q_E, 1);
conf.setMaximumCapacity(Q_E, 1);
conf.setAcl(Q_E, QueueACL.SUBMIT_APPLICATIONS, "user_e");
+
}
static LeafQueue stubLeafQueue(LeafQueue queue) {
@@ -638,7 +639,146 @@ public class TestLeafQueue {
assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
assertEquals(2*GB, app_1.getCurrentConsumption().getMemory());
}
-
+
+ @Test
+ public void testComputeUserLimitAndSetHeadroom(){
+ LeafQueue qb = stubLeafQueue((LeafQueue)queues.get(B));
+ qb.setMaxCapacity(1.0f);
+ // Users
+ final String user_0 = "user_0";
+ final String user_1 = "user_1";
+
+ //create nodes
+ String host_0 = "127.0.0.1";
+ FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB);
+ String host_1 = "127.0.0.2";
+ FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8*GB);
+
+ final int numNodes = 2;
+ Resource clusterResource = Resources.createResource(numNodes * (8*GB), 1);
+ when(csContext.getNumClusterNodes()).thenReturn(numNodes);
+
+ //our test plan contains three cases
+ //1. single user dominate the queue, we test the headroom
+ //2. two users, but user_0 is assigned 100% of the queue resource,
+ // submit user_1's application, check headroom correctness
+ //3. two users, each is assigned 50% of the queue resource
+ // each user submit one application and check their headrooms
+ //4. similarly to 3. but user_0 has no quote left and there are
+ // free resources left, check headroom
+
+ //test case 1
+ qb.setUserLimit(100);
+ qb.setUserLimitFactor(1);
+
+ final ApplicationAttemptId appAttemptId_0 =
+ TestUtils.getMockApplicationAttemptId(0, 0);
+ FiCaSchedulerApp app_0 =
+ new FiCaSchedulerApp(appAttemptId_0, user_0, qb,
+ qb.getActiveUsersManager(), rmContext);
+ qb.submitApplicationAttempt(app_0, user_0);
+ Priority u0Priority = TestUtils.createMockPriority(1);
+ app_0.updateResourceRequests(Collections.singletonList(
+ TestUtils.createResourceRequest(ResourceRequest.ANY, 4*GB, 1, true,
+ u0Priority, recordFactory)));
+
+ assertEquals("There should only be 1 active user!",
+ 1, qb.getActiveUsersManager().getNumActiveUsers());
+ //get headroom
+ qb.assignContainers(clusterResource, node_0, false);
+ qb.computeUserLimitAndSetHeadroom(app_0, clusterResource,
+ app_0.getResourceRequest(u0Priority, ResourceRequest.ANY).getCapability());
+
+ //maxqueue 16G, userlimit 13G, - 4G used = 9G
+ assertEquals(9*GB,app_0.getHeadroom().getMemory());
+
+ //test case 2
+ final ApplicationAttemptId appAttemptId_2 =
+ TestUtils.getMockApplicationAttemptId(2, 0);
+ FiCaSchedulerApp app_2 =
+ new FiCaSchedulerApp(appAttemptId_2, user_1, qb,
+ qb.getActiveUsersManager(), rmContext);
+ Priority u1Priority = TestUtils.createMockPriority(2);
+ app_2.updateResourceRequests(Collections.singletonList(
+ TestUtils.createResourceRequest(ResourceRequest.ANY, 4*GB, 1, true,
+ u1Priority, recordFactory)));
+ qb.submitApplicationAttempt(app_2, user_1);
+ qb.assignContainers(clusterResource, node_1, false);
+ qb.computeUserLimitAndSetHeadroom(app_0, clusterResource,
+ app_0.getResourceRequest(u0Priority, ResourceRequest.ANY).getCapability());
+
+ assertEquals(8*GB, qb.getUsedResources().getMemory());
+ assertEquals(4*GB, app_0.getCurrentConsumption().getMemory());
+ //maxqueue 16G, userlimit 13G, - 4G used = 9G BUT
+ //maxqueue 16G - used 8G (4 each app/user) = 8G max headroom (the new logic)
+ assertEquals(8*GB, app_0.getHeadroom().getMemory());
+ assertEquals(4*GB, app_2.getCurrentConsumption().getMemory());
+ assertEquals(8*GB, app_2.getHeadroom().getMemory());
+
+ //test case 3
+ qb.finishApplication(app_0.getApplicationId(), user_0);
+ qb.finishApplication(app_2.getApplicationId(), user_1);
+ qb.releaseResource(clusterResource, app_0, app_0.getResource(u0Priority));
+ qb.releaseResource(clusterResource, app_2, app_2.getResource(u1Priority));
+
+ qb.setUserLimit(50);
+ qb.setUserLimitFactor(1);
+
+ final ApplicationAttemptId appAttemptId_1 =
+ TestUtils.getMockApplicationAttemptId(1, 0);
+ FiCaSchedulerApp app_1 =
+ new FiCaSchedulerApp(appAttemptId_1, user_0, qb,
+ qb.getActiveUsersManager(), rmContext);
+ final ApplicationAttemptId appAttemptId_3 =
+ TestUtils.getMockApplicationAttemptId(3, 0);
+ FiCaSchedulerApp app_3 =
+ new FiCaSchedulerApp(appAttemptId_3, user_1, qb,
+ qb.getActiveUsersManager(), rmContext);
+ app_1.updateResourceRequests(Collections.singletonList(
+ TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true,
+ u0Priority, recordFactory)));
+ app_3.updateResourceRequests(Collections.singletonList(
+ TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true,
+ u1Priority, recordFactory)));
+ qb.submitApplicationAttempt(app_1, user_0);
+ qb.submitApplicationAttempt(app_3, user_1);
+ qb.assignContainers(clusterResource, node_0, false);
+ qb.assignContainers(clusterResource, node_0, false);
+ qb.computeUserLimitAndSetHeadroom(app_3, clusterResource,
+ app_3.getResourceRequest(u1Priority, ResourceRequest.ANY).getCapability());
+ assertEquals(4*GB, qb.getUsedResources().getMemory());
+ //maxqueue 16G, userlimit 7G, used (by each user) 2G, headroom 5G (both)
+ assertEquals(5*GB, app_3.getHeadroom().getMemory());
+ assertEquals(5*GB, app_1.getHeadroom().getMemory());
+ //test case 4
+ final ApplicationAttemptId appAttemptId_4 =
+ TestUtils.getMockApplicationAttemptId(4, 0);
+ FiCaSchedulerApp app_4 =
+ new FiCaSchedulerApp(appAttemptId_4, user_0, qb,
+ qb.getActiveUsersManager(), rmContext);
+ qb.submitApplicationAttempt(app_4, user_0);
+ app_4.updateResourceRequests(Collections.singletonList(
+ TestUtils.createResourceRequest(ResourceRequest.ANY, 6*GB, 1, true,
+ u0Priority, recordFactory)));
+ qb.assignContainers(clusterResource, node_1, false);
+ qb.computeUserLimitAndSetHeadroom(app_4, clusterResource,
+ app_4.getResourceRequest(u0Priority, ResourceRequest.ANY).getCapability());
+ qb.computeUserLimitAndSetHeadroom(app_3, clusterResource,
+ app_3.getResourceRequest(u1Priority, ResourceRequest.ANY).getCapability());
+
+
+ //app3 is user1, active from last test case
+ //maxqueue 16G, userlimit 13G, used 2G, would be headroom 10G BUT
+ //10G in use, so max possible headroom is 6G (new logic)
+ assertEquals(6*GB, app_3.getHeadroom().getMemory());
+ //testcase3 still active - 2+2+6=10
+ assertEquals(10*GB, qb.getUsedResources().getMemory());
+ //app4 is user 0
+ //maxqueue 16G, userlimit 13G, used 8G, headroom 5G
+ //(8G used is 6G from this test case - app4, 2 from last test case, app_1)
+ assertEquals(5*GB, app_4.getHeadroom().getMemory());
+ }
+
@Test
public void testUserHeadroomMultiApp() throws Exception {
// Mock the queue
@@ -787,7 +927,7 @@ public class TestLeafQueue {
// Set user-limit
a.setUserLimit(50);
a.setUserLimitFactor(2);
-
+
// Now, only user_0 should be active since he is the only one with
// outstanding requests
assertEquals("There should only be 1 active user!",
@@ -835,7 +975,7 @@ public class TestLeafQueue {
priority, recordFactory)));
assertEquals(1, a.getActiveUsersManager().getNumActiveUsers());
a.assignContainers(clusterResource, node_1, false);
- assertEquals(1*GB, app_2.getHeadroom().getMemory()); // hit queue max-cap
+ assertEquals(0*GB, app_2.getHeadroom().getMemory()); // hit queue max-cap
}
@Test