You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sn...@apache.org on 2022/03/10 21:23:15 UTC
[hadoop] branch trunk updated: YARN-11067. Resource overcommitment due to incorrect resource normalisation logical order. Contributed by Andras Gyori
This is an automated email from the ASF dual-hosted git repository.
snemeth pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new ed65aa2 YARN-11067. Resource overcommitment due to incorrect resource normalisation logical order. Contributed by Andras Gyori
ed65aa2 is described below
commit ed65aa23240b3dd6b56e86e5f0e9d38069fb3b01
Author: Szilard Nemeth <sn...@apache.org>
AuthorDate: Thu Mar 10 22:22:58 2022 +0100
YARN-11067. Resource overcommitment due to incorrect resource normalisation logical order. Contributed by Andras Gyori
---
.../scheduler/capacity/ParentQueue.java | 37 ++++++++---------
.../TestAbsoluteResourceConfiguration.java | 47 ++++++++++++++++++++++
2 files changed, 64 insertions(+), 20 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
index c624aab..87ebc0b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
@@ -1294,17 +1294,24 @@ public class ParentQueue extends AbstractCSQueue {
private void calculateEffectiveResourcesAndCapacity(String label,
Resource clusterResource) {
+ // Update effective resources for my self;
+ if (rootQueue) {
+ Resource resourceByLabel = labelManager.getResourceByLabel(label, clusterResource);
+ usageTracker.getQueueResourceQuotas().setEffectiveMinResource(label, resourceByLabel);
+ usageTracker.getQueueResourceQuotas().setEffectiveMaxResource(label, resourceByLabel);
+ } else {
+ super.updateEffectiveResources(clusterResource);
+ }
+
+ recalculateEffectiveMinRatio(label, clusterResource);
+ }
+
+ private void recalculateEffectiveMinRatio(String label, Resource clusterResource) {
// For root queue, ensure that max/min resource is updated to latest
// cluster resource.
- Resource resourceByLabel = labelManager.getResourceByLabel(label,
- clusterResource);
-
- /*
- * == Below logic are added to calculate effectiveMinRatioPerResource ==
- */
+ Resource resourceByLabel = labelManager.getResourceByLabel(label, clusterResource);
- // Total configured min resources of direct children of this given parent
- // queue
+ // Total configured min resources of direct children of this given parent queue
Resource configuredMinResources = Resource.newInstance(0L, 0);
for (CSQueue childQueue : getChildQueues()) {
Resources.addTo(configuredMinResources,
@@ -1312,8 +1319,7 @@ public class ParentQueue extends AbstractCSQueue {
}
// Factor to scale down effective resource: When cluster has sufficient
- // resources, effective_min_resources will be same as configured
- // min_resources.
+ // resources, effective_min_resources will be same as configured min_resources.
Resource numeratorForMinRatio = null;
if (getQueuePath().equals("root")) {
if (!resourceByLabel.equals(Resources.none()) && Resources.lessThan(resourceCalculator,
@@ -1324,21 +1330,12 @@ public class ParentQueue extends AbstractCSQueue {
if (Resources.lessThan(resourceCalculator, clusterResource,
usageTracker.getQueueResourceQuotas().getEffectiveMinResource(label),
configuredMinResources)) {
- numeratorForMinRatio = usageTracker.getQueueResourceQuotas()
- .getEffectiveMinResource(label);
+ numeratorForMinRatio = usageTracker.getQueueResourceQuotas().getEffectiveMinResource(label);
}
}
effectiveMinResourceRatio.put(label, getEffectiveMinRatio(
configuredMinResources, numeratorForMinRatio));
-
- // Update effective resources for my self;
- if (rootQueue) {
- usageTracker.getQueueResourceQuotas().setEffectiveMinResource(label, resourceByLabel);
- usageTracker.getQueueResourceQuotas().setEffectiveMaxResource(label, resourceByLabel);
- } else{
- super.updateEffectiveResources(clusterResource);
- }
}
private Map<String, Float> getEffectiveMinRatio(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceConfiguration.java
index d7c80b5..8d68cbf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceConfiguration.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.Assert;
import org.junit.Test;
@@ -100,6 +101,21 @@ public class TestAbsoluteResourceConfiguration {
private static Set<String> resourceTypes = new HashSet<>(
Arrays.asList("memory", "vcores"));
+ private CapacitySchedulerConfiguration setupNormalizationConfiguration() {
+ CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
+ csConf.setQueues(CapacitySchedulerConfiguration.ROOT,
+ new String[]{QUEUEA, QUEUEB});
+ csConf.setQueues(QUEUEA_FULL.getFullPath(), new String[]{QUEUEA1, QUEUEA2});
+
+// 60, 28
+ csConf.setMinimumResourceRequirement("", QUEUEA_FULL, Resource.newInstance(50 * GB, 20));
+ csConf.setMinimumResourceRequirement("", QUEUEA1_FULL, Resource.newInstance(30 * GB, 15));
+ csConf.setMinimumResourceRequirement("", QUEUEA2_FULL, Resource.newInstance(20 * GB, 5));
+ csConf.setMinimumResourceRequirement("", QUEUEB_FULL, Resource.newInstance(10 * GB, 8));
+
+ return csConf;
+ }
+
private CapacitySchedulerConfiguration setupSimpleQueueConfiguration(
boolean isCapacityNeeded) {
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
@@ -293,6 +309,37 @@ public class TestAbsoluteResourceConfiguration {
}
@Test
+ public void testNormalizationAfterNodeRemoval() throws Exception {
+ CapacitySchedulerConfiguration csConf = setupNormalizationConfiguration();
+ csConf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
+
+ MockRM rm = new MockRM(csConf);
+
+ rm.start();
+ rm.registerNode("h1:1234", 8 * GB, 4);
+ rm.registerNode("h2:1234", 8 * GB, 4);
+ rm.registerNode("h3:1234", 8 * GB, 4);
+ MockNM nm = rm.registerNode("h4:1234", 8 * GB, 4);
+ rm.registerNode("h5:1234", 28 * GB, 12);
+
+ // Send a removal event to CS. MockRM#unregisterNode does not reflect the real world scenario,
+ // therefore we manually need to invoke this removal event.
+ CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
+ cs.handle(new NodeRemovedSchedulerEvent(rm.getRMContext().getRMNodes().get(nm.getNodeId())));
+
+ Resource res = Resources.add(
+ cs.getQueue(QUEUEA1_FULL.getFullPath()).getEffectiveCapacity(""),
+ cs.getQueue(QUEUEA2_FULL.getFullPath()).getEffectiveCapacity(""));
+ Resource resParent = cs.getQueue(QUEUEA_FULL.getFullPath()).getEffectiveCapacity("");
+
+ // Check if there is no overcommitment on behalf of the child queues
+ Assert.assertTrue(String.format("Summarized resource %s of all children is greater than " +
+ "their parent's %s", res, resParent),
+ Resources.lessThan(cs.getResourceCalculator(), cs.getClusterResource(), res, resParent));
+ }
+
+ @Test
public void testEffectiveMinMaxResourceConfigurartionPerQueue()
throws Exception {
// create conf with basic queue configuration.
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org