You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jl...@apache.org on 2017/09/21 20:22:15 UTC
hadoop git commit: YARN-5195. RM intermittently crashed with NPE
while handling APP_ATTEMPT_REMOVED event when async-scheduling enabled in
CapacityScheduler. Contributed by sandflee and Jonathan Hung
Repository: hadoop
Updated Branches:
refs/heads/branch-2.8 c0bb24236 -> b0b10fc94
YARN-5195. RM intermittently crashed with NPE while handling APP_ATTEMPT_REMOVED event when async-scheduling enabled in CapacityScheduler. Contributed by sandflee and Jonathan Hung
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b0b10fc9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b0b10fc9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b0b10fc9
Branch: refs/heads/branch-2.8
Commit: b0b10fc94a14e7f5046f854e0d46a2d4c1f33d81
Parents: c0bb242
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Sep 21 15:17:03 2017 -0500
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Sep 21 15:17:03 2017 -0500
----------------------------------------------------------------------
.../scheduler/capacity/CapacityScheduler.java | 9 ++++-
.../capacity/TestCapacityScheduler.java | 38 ++++++++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b0b10fc9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index 399e16e..2d942e7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -1220,11 +1220,18 @@ public class CapacityScheduler extends
}
@VisibleForTesting
- protected synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
+ public synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
if (rmContext.isWorkPreservingRecoveryEnabled()
&& !rmContext.isSchedulerReadyForAllocatingContainers()) {
return;
}
+
+ if (!nodes.containsKey(node.getNodeID())) {
+ LOG.info("Skipping scheduling as the node " + node.getNodeID() +
+ " has been removed");
+ return;
+ }
+
// reset allocation and reservation stats before we start doing any work
updateSchedulerHealth(lastNodeUpdateTime, node,
new CSAssignment(Resources.none(), NodeType.NODE_LOCAL));
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b0b10fc9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index a663bdb..e7eb788 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -3875,4 +3875,42 @@ public class TestCapacityScheduler {
}
}
}
+
+ @Test
+ public void testSchedulingOnRemovedNode() throws Exception {
+ Configuration conf = new YarnConfiguration();
+ conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
+ conf.setBoolean(
+ CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE,
+ false);
+
+ MockRM rm = new MockRM(conf);
+ rm.start();
+ RMApp app = rm.submitApp(100);
+ rm.drainEvents();
+
+ MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10240, 10);
+ MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
+
+ //remove nm2 to keep am alive
+ MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10240, 10);
+
+ am.allocate(ResourceRequest.ANY, 2048, 1, null);
+
+ CapacityScheduler scheduler =
+ (CapacityScheduler) rm.getRMContext().getScheduler();
+ FiCaSchedulerNode node = scheduler.getAllNodes().get(nm2.getNodeId());
+ scheduler.handle(new NodeRemovedSchedulerEvent(
+ rm.getRMContext().getRMNodes().get(nm2.getNodeId())));
+ // schedulerNode is removed, try allocate a container
+ scheduler.allocateContainersToNode(node);
+
+ AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
+ new AppAttemptRemovedSchedulerEvent(
+ am.getApplicationAttemptId(),
+ RMAppAttemptState.FINISHED, false);
+ scheduler.handle(appRemovedEvent1);
+ rm.stop();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org