You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by ka...@apache.org on 2014/06/07 03:23:24 UTC
svn commit: r1601051 - in
/hadoop/common/branches/branch-2/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/
hadoop-yarn/hadoop-yarn-server/h...
Author: kasha
Date: Sat Jun 7 01:23:23 2014
New Revision: 1601051
URL: http://svn.apache.org/r1601051
Log:
YARN-2128. FairScheduler: Incorrect calculation of amResource usage. (Wei Yan via kasha)
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Sat Jun 7 01:23:23 2014
@@ -193,6 +193,9 @@ Release 2.5.0 - UNRELEASED
YARN-2121. Fixed NPE handling in Timeline Server's TimelineAuthenticator.
(Zhijie Shen via vinodkv)
+ YARN-2128. FairScheduler: Incorrect calculation of amResource usage.
+ (Wei Yan via kasha)
+
Release 2.4.1 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java Sat Jun 7 01:23:23 2014
@@ -79,6 +79,7 @@ public class SchedulerApplicationAttempt
protected Resource currentConsumption = Resource.newInstance(0, 0);
private Resource amResource;
private boolean unmanagedAM = true;
+ private boolean amRunning = false;
protected List<RMContainer> newlyAllocatedContainers =
new ArrayList<RMContainer>();
@@ -110,7 +111,6 @@ public class SchedulerApplicationAttempt
activeUsersManager);
this.queue = queue;
-
if (rmContext != null && rmContext.getRMApps() != null &&
rmContext.getRMApps()
.containsKey(applicationAttemptId.getApplicationId())) {
@@ -118,7 +118,6 @@ public class SchedulerApplicationAttempt
rmContext.getRMApps().get(applicationAttemptId.getApplicationId())
.getApplicationSubmissionContext();
if (appSubmissionContext != null) {
- amResource = appSubmissionContext.getResource();
unmanagedAM = appSubmissionContext.getUnmanagedAM();
}
}
@@ -188,6 +187,18 @@ public class SchedulerApplicationAttempt
return amResource;
}
+ public void setAMResource(Resource amResource) {
+ this.amResource = amResource;
+ }
+
+ public boolean isAmRunning() {
+ return amRunning;
+ }
+
+ public void setAmRunning(boolean bool) {
+ amRunning = bool;
+ }
+
public boolean getUnmanagedAM() {
return unmanagedAM;
}
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java Sat Jun 7 01:23:23 2014
@@ -271,6 +271,7 @@ public class AppSchedulable extends Sche
if (app.getLiveContainers().size() == 1 &&
!app.getUnmanagedAM()) {
queue.addAMResourceUsage(container.getResource());
+ app.setAmRunning(true);
}
return container.getResource();
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java Sat Jun 7 01:23:23 2014
@@ -91,7 +91,7 @@ public class FSLeafQueue extends FSQueue
public boolean removeApp(FSSchedulerApp app) {
if (runnableAppScheds.remove(app.getAppSchedulable())) {
// Update AM resource usage
- if (app.getAMResource() != null) {
+ if (app.isAmRunning() && app.getAMResource() != null) {
Resources.subtractFrom(amResourceUsage, app.getAMResource());
}
return true;
@@ -153,6 +153,10 @@ public class FSLeafQueue extends FSQueue
return usage;
}
+ public Resource getAmResourceUsage() {
+ return amResourceUsage;
+ }
+
@Override
public void updateDemand() {
// Compute demand by iterating through apps in the queue
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java Sat Jun 7 01:23:23 2014
@@ -836,6 +836,12 @@ public class FairScheduler extends
SchedulerUtils.normalizeRequests(ask, new DominantResourceCalculator(),
clusterResource, minimumAllocation, maximumAllocation, incrAllocation);
+ // Set amResource for this app
+ if (!application.getUnmanagedAM() && ask.size() == 1
+ && application.getLiveContainers().isEmpty()) {
+ application.setAMResource(ask.get(0).getCapability());
+ }
+
// Release containers
for (ContainerId releasedContainerId : release) {
RMContainer rmContainer = getRMContainer(releasedContainerId);
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java?rev=1601051&r1=1601050&r2=1601051&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java Sat Jun 7 01:23:23 2014
@@ -2328,12 +2328,13 @@ public class TestFairScheduler extends F
scheduler.handle(nodeEvent);
scheduler.update();
+ FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 10240",
- 10240, scheduler.getQueueManager().getLeafQueue("queue1", true)
- .getFairShare().getMemory());
+ 10240, queue1.getFairShare().getMemory());
Resource amResource1 = Resource.newInstance(1024, 1);
Resource amResource2 = Resource.newInstance(2048, 2);
+ Resource amResource3 = Resource.newInstance(1860, 2);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
// Exceeds no limits
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
@@ -2346,6 +2347,8 @@ public class TestFairScheduler extends F
1024, app1.getAMResource().getMemory());
assertEquals("Application1's AM should be running",
1, app1.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 1024 MB memory",
+ 1024, queue1.getAmResourceUsage().getMemory());
// Exceeds no limits
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
@@ -2358,6 +2361,8 @@ public class TestFairScheduler extends F
1024, app2.getAMResource().getMemory());
assertEquals("Application2's AM should be running",
1, app2.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
// Exceeds queue limit
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
@@ -2370,6 +2375,8 @@ public class TestFairScheduler extends F
1024, app3.getAMResource().getMemory());
assertEquals("Application3's AM should not be running",
0, app3.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
// Still can run non-AM container
createSchedulingRequestExistingApplication(1024, 1, attId1);
@@ -2377,6 +2384,8 @@ public class TestFairScheduler extends F
scheduler.handle(updateEvent);
assertEquals("Application1 should have two running containers",
2, app1.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
// Remove app1, app3's AM should become running
AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
@@ -2388,6 +2397,8 @@ public class TestFairScheduler extends F
0, app1.getLiveContainers().size());
assertEquals("Application3's AM should be running",
1, app3.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
// Exceeds queue limit
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
@@ -2400,8 +2411,35 @@ public class TestFairScheduler extends F
2048, app4.getAMResource().getMemory());
assertEquals("Application4's AM should not be running",
0, app4.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
- // Remove app2 and app3, app4's AM should become running
+ // Exceeds queue limit
+ ApplicationAttemptId attId5 = createAppAttemptId(5, 1);
+ createApplicationWithAMResource(attId5, "queue1", "user1", amResource2);
+ createSchedulingRequestExistingApplication(2048, 2, amPriority, attId5);
+ FSSchedulerApp app5 = scheduler.getSchedulerApp(attId5);
+ scheduler.update();
+ scheduler.handle(updateEvent);
+ assertEquals("Application5's AM requests 2048 MB memory",
+ 2048, app5.getAMResource().getMemory());
+ assertEquals("Application5's AM should not be running",
+ 0, app5.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
+
+ // Remove un-running app doesn't affect others
+ AppAttemptRemovedSchedulerEvent appRemovedEvent4 =
+ new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.KILLED, false);
+ scheduler.handle(appRemovedEvent4);
+ scheduler.update();
+ scheduler.handle(updateEvent);
+ assertEquals("Application5's AM should not be running",
+ 0, app5.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
+
+ // Remove app2 and app3, app5's AM should become running
AppAttemptRemovedSchedulerEvent appRemovedEvent2 =
new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
AppAttemptRemovedSchedulerEvent appRemovedEvent3 =
@@ -2414,8 +2452,35 @@ public class TestFairScheduler extends F
0, app2.getLiveContainers().size());
assertEquals("Application3's AM should be finished",
0, app3.getLiveContainers().size());
- assertEquals("Application4's AM should be running",
- 1, app4.getLiveContainers().size());
+ assertEquals("Application5's AM should be running",
+ 1, app5.getLiveContainers().size());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
+
+ // Check amResource normalization
+ ApplicationAttemptId attId6 = createAppAttemptId(6, 1);
+ createApplicationWithAMResource(attId6, "queue1", "user1", amResource3);
+ createSchedulingRequestExistingApplication(1860, 2, amPriority, attId6);
+ FSSchedulerApp app6 = scheduler.getSchedulerApp(attId6);
+ scheduler.update();
+ scheduler.handle(updateEvent);
+ assertEquals("Application6's AM should not be running",
+ 0, app6.getLiveContainers().size());
+ assertEquals("Application6's AM requests 2048 MB memory",
+ 2048, app6.getAMResource().getMemory());
+ assertEquals("Queue1's AM resource usage should be 2048 MB memory",
+ 2048, queue1.getAmResourceUsage().getMemory());
+
+ // Remove all apps
+ AppAttemptRemovedSchedulerEvent appRemovedEvent5 =
+ new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.FINISHED, false);
+ AppAttemptRemovedSchedulerEvent appRemovedEvent6 =
+ new AppAttemptRemovedSchedulerEvent(attId6, RMAppAttemptState.FINISHED, false);
+ scheduler.handle(appRemovedEvent5);
+ scheduler.handle(appRemovedEvent6);
+ scheduler.update();
+ assertEquals("Queue1's AM resource usage should be 0",
+ 0, queue1.getAmResourceUsage().getMemory());
}
@Test