You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ep...@apache.org on 2017/07/06 15:44:56 UTC
[2/2] hadoop git commit: YARN-2113. Add cross-user preemption within
CapacityScheduler's leaf-queue. (Contributed by Sunil G)
YARN-2113. Add cross-user preemption within CapacityScheduler's leaf-queue. (Contributed by Sunil G)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/eda4ac07
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/eda4ac07
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/eda4ac07
Branch: refs/heads/branch-2
Commit: eda4ac07c1835031aca7e27cc673f1c5913813bb
Parents: 53c2441
Author: Eric Payne <ep...@apache.org>
Authored: Thu Jul 6 10:43:00 2017 -0500
Committer: Eric Payne <ep...@apache.org>
Committed: Thu Jul 6 10:43:00 2017 -0500
----------------------------------------------------------------------
.../resource/DefaultResourceCalculator.java | 5 +
.../resource/DominantResourceCalculator.java | 5 +
.../yarn/util/resource/ResourceCalculator.java | 9 +
.../hadoop/yarn/util/resource/Resources.java | 5 +
.../CapacitySchedulerPreemptionContext.java | 5 +
.../CapacitySchedulerPreemptionUtils.java | 9 +-
.../FifoIntraQueuePreemptionPlugin.java | 329 +++++--
.../capacity/IntraQueueCandidatesSelector.java | 112 ++-
.../IntraQueuePreemptionComputePlugin.java | 10 +-
.../ProportionalCapacityPreemptionPolicy.java | 25 +-
.../monitor/capacity/TempAppPerPartition.java | 24 +-
.../monitor/capacity/TempQueuePerPartition.java | 14 +
.../monitor/capacity/TempUserPerPartition.java | 88 ++
.../CapacitySchedulerConfiguration.java | 8 +
.../scheduler/capacity/LeafQueue.java | 67 +-
...alCapacityPreemptionPolicyMockFramework.java | 89 +-
...ionalCapacityPreemptionPolicyIntraQueue.java | 30 +-
...cityPreemptionPolicyIntraQueueUserLimit.java | 899 +++++++++++++++++++
...pacityPreemptionPolicyIntraQueueWithDRF.java | 178 ++++
19 files changed, 1738 insertions(+), 173 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
index ef7229c..524a049 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
@@ -121,4 +121,9 @@ public class DefaultResourceCalculator extends ResourceCalculator {
Resource smaller, Resource bigger) {
return smaller.getMemorySize() <= bigger.getMemorySize();
}
+
+ @Override
+ public boolean isAnyMajorResourceZero(Resource resource) {
+ return resource.getMemorySize() == 0f;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
index 032aa02..69fe716 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
@@ -239,4 +239,9 @@ public class DominantResourceCalculator extends ResourceCalculator {
return smaller.getMemorySize() <= bigger.getMemorySize()
&& smaller.getVirtualCores() <= bigger.getVirtualCores();
}
+
+ @Override
+ public boolean isAnyMajorResourceZero(Resource resource) {
+ return resource.getMemorySize() == 0f || resource.getVirtualCores() == 0;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
index a2f85b3..d219fe1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
@@ -204,4 +204,13 @@ public abstract class ResourceCalculator {
*/
public abstract boolean fitsIn(Resource cluster,
Resource smaller, Resource bigger);
+
+ /**
+ * Check if resource has any major resource types (which are all NodeManagers
+ * included) a zero value.
+ *
+ * @param resource resource
+ * @return returns true if any resource is zero.
+ */
+ public abstract boolean isAnyMajorResourceZero(Resource resource);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
index fc46fa2..91a5297 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
@@ -345,4 +345,9 @@ public class Resources {
return createResource(Math.max(lhs.getMemorySize(), rhs.getMemorySize()),
Math.max(lhs.getVirtualCores(), rhs.getVirtualCores()));
}
+
+ public static boolean isAnyMajorResourceZero(ResourceCalculator rc,
+ Resource resource) {
+ return rc.isAnyMajorResourceZero(resource);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
index 982b1f1..d6f3f6c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
@@ -18,9 +18,11 @@
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
@@ -63,4 +65,7 @@ interface CapacitySchedulerPreemptionContext {
float getMinimumThresholdForIntraQueuePreemption();
float getMaxAllowableLimitForIntraQueuePreemption();
+
+ @Unstable
+ IntraQueuePreemptionOrderPolicy getIntraQueuePreemptionOrderPolicy();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
index abad2a1..0ae3ef0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
@@ -99,7 +99,7 @@ public class CapacitySchedulerPreemptionUtils {
}
deductPreemptableResourcePerApp(context, tq.totalPartitionResource,
- tas, res, partition);
+ tas, res);
}
}
}
@@ -108,10 +108,10 @@ public class CapacitySchedulerPreemptionUtils {
private static void deductPreemptableResourcePerApp(
CapacitySchedulerPreemptionContext context,
Resource totalPartitionResource, Collection<TempAppPerPartition> tas,
- Resource res, String partition) {
+ Resource res) {
for (TempAppPerPartition ta : tas) {
ta.deductActuallyToBePreempted(context.getResourceCalculator(),
- totalPartitionResource, res, partition);
+ totalPartitionResource, res);
}
}
@@ -157,7 +157,8 @@ public class CapacitySchedulerPreemptionUtils {
&& Resources.greaterThan(rc, clusterResource, toObtainByPartition,
Resources.none())
&& Resources.fitsIn(rc, clusterResource,
- rmContainer.getAllocatedResource(), totalPreemptionAllowed)) {
+ rmContainer.getAllocatedResource(), totalPreemptionAllowed)
+ && !Resources.isAnyMajorResourceZero(rc, toObtainByPartition)) {
Resources.subtractFrom(toObtainByPartition,
rmContainer.getAllocatedResource());
Resources.subtractFrom(totalPreemptionAllowed,
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
index 757f567..4bf6760 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
@@ -18,11 +18,13 @@
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
@@ -33,8 +35,11 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.IntraQueueCandidatesSelector.TAPriorityComparator;
+import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
@@ -60,6 +65,26 @@ public class FifoIntraQueuePreemptionPlugin
}
@Override
+ public Collection<FiCaSchedulerApp> getPreemptableApps(String queueName,
+ String partition) {
+ TempQueuePerPartition tq = context.getQueueByPartition(queueName,
+ partition);
+
+ List<FiCaSchedulerApp> apps = new ArrayList<FiCaSchedulerApp>();
+ for (TempAppPerPartition tmpApp : tq.getApps()) {
+ // If a lower priority app was not selected to get preempted, mark such
+ // apps out from preemption candidate selection.
+ if (Resources.equals(tmpApp.getActuallyToBePreempted(),
+ Resources.none())) {
+ continue;
+ }
+
+ apps.add(tmpApp.app);
+ }
+ return apps;
+ }
+
+ @Override
public Map<String, Resource> getResourceDemandFromAppsPerQueue(
String queueName, String partition) {
@@ -89,7 +114,7 @@ public class FifoIntraQueuePreemptionPlugin
@Override
public void computeAppsIdealAllocation(Resource clusterResource,
- Resource partitionBasedResource, TempQueuePerPartition tq,
+ TempQueuePerPartition tq,
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
Resource totalPreemptedResourceAllowed,
Resource queueReassignableResource, float maxAllowablePreemptLimit) {
@@ -112,17 +137,15 @@ public class FifoIntraQueuePreemptionPlugin
// 3. Create all tempApps for internal calculation and return a list from
// high priority to low priority order.
- TAPriorityComparator taComparator = new TAPriorityComparator();
- PriorityQueue<TempAppPerPartition> orderedByPriority =
- createTempAppForResCalculation(tq.partition, apps, taComparator);
+ PriorityQueue<TempAppPerPartition> orderedByPriority = createTempAppForResCalculation(
+ tq, apps, clusterResource, perUserAMUsed);
// 4. Calculate idealAssigned per app by checking based on queue's
// unallocated resource.Also return apps arranged from lower priority to
// higher priority.
- TreeSet<TempAppPerPartition> orderedApps =
- calculateIdealAssignedResourcePerApp(clusterResource,
- partitionBasedResource, tq, selectedCandidates,
- queueReassignableResource, orderedByPriority, perUserAMUsed);
+ TreeSet<TempAppPerPartition> orderedApps = calculateIdealAssignedResourcePerApp(
+ clusterResource, tq, selectedCandidates, queueReassignableResource,
+ orderedByPriority);
// 5. A configurable limit that could define an ideal allowable preemption
// limit. Based on current queue's capacity,defined how much % could become
@@ -145,7 +168,7 @@ public class FifoIntraQueuePreemptionPlugin
// 7. From lowest priority app onwards, calculate toBePreempted resource
// based on demand.
calculateToBePreemptedResourcePerApp(clusterResource, orderedApps,
- preemptionLimit);
+ Resources.clone(preemptionLimit));
// Save all apps (low to high) to temp queue for further reference
tq.addAllApps(orderedApps);
@@ -153,7 +176,8 @@ public class FifoIntraQueuePreemptionPlugin
// 8. There are chances that we may preempt for the demand from same
// priority level, such cases are to be validated out.
validateOutSameAppPriorityFromDemand(clusterResource,
- (TreeSet<TempAppPerPartition>) tq.getApps());
+ (TreeSet<TempAppPerPartition>) orderedApps, tq.getUsersPerPartition(),
+ context.getIntraQueuePreemptionOrderPolicy());
if (LOG.isDebugEnabled()) {
LOG.debug("Queue Name:" + tq.queueName + ", partition:" + tq.partition);
@@ -176,17 +200,17 @@ public class FifoIntraQueuePreemptionPlugin
Resource preemtableFromApp = Resources.subtract(tmpApp.getUsed(),
tmpApp.idealAssigned);
- Resources.subtractFrom(preemtableFromApp, tmpApp.selected);
- Resources.subtractFrom(preemtableFromApp, tmpApp.getAMUsed());
+ Resources.subtractFromNonNegative(preemtableFromApp, tmpApp.selected);
+ Resources.subtractFromNonNegative(preemtableFromApp, tmpApp.getAMUsed());
// Calculate toBePreempted from apps as follows:
// app.preemptable = min(max(app.used - app.selected - app.ideal, 0),
// intra_q_preemptable)
tmpApp.toBePreempted = Resources.min(rc, clusterResource, Resources
.max(rc, clusterResource, preemtableFromApp, Resources.none()),
- preemptionLimit);
+ Resources.clone(preemptionLimit));
- preemptionLimit = Resources.subtract(preemptionLimit,
+ preemptionLimit = Resources.subtractFromNonNegative(preemptionLimit,
tmpApp.toBePreempted);
}
}
@@ -221,31 +245,24 @@ public class FifoIntraQueuePreemptionPlugin
* }
*
* @param clusterResource Cluster Resource
- * @param partitionBasedResource resource per partition
* @param tq TempQueue
* @param selectedCandidates Already Selected preemption candidates
* @param queueReassignableResource Resource used in a queue
* @param orderedByPriority List of running apps
- * @param perUserAMUsed AM used resource
* @return List of temp apps ordered from low to high priority
*/
private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
- Resource clusterResource, Resource partitionBasedResource,
- TempQueuePerPartition tq,
+ Resource clusterResource, TempQueuePerPartition tq,
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
Resource queueReassignableResource,
- PriorityQueue<TempAppPerPartition> orderedByPriority,
- Map<String, Resource> perUserAMUsed) {
+ PriorityQueue<TempAppPerPartition> orderedByPriority) {
Comparator<TempAppPerPartition> reverseComp = Collections
.reverseOrder(new TAPriorityComparator());
TreeSet<TempAppPerPartition> orderedApps = new TreeSet<>(reverseComp);
- Map<String, Resource> userIdealAssignedMapping = new HashMap<>();
String partition = tq.partition;
-
- Map<String, Resource> preCalculatedUserLimit =
- new HashMap<String, Resource>();
+ Map<String, TempUserPerPartition> usersPerPartition = tq.getUsersPerPartition();
while (!orderedByPriority.isEmpty()) {
// Remove app from the next highest remaining priority and process it to
@@ -255,43 +272,19 @@ public class FifoIntraQueuePreemptionPlugin
// Once unallocated resource is 0, we can stop assigning ideal per app.
if (Resources.lessThanOrEqual(rc, clusterResource,
- queueReassignableResource, Resources.none())) {
+ queueReassignableResource, Resources.none())
+ || Resources.isAnyMajorResourceZero(rc, queueReassignableResource)) {
continue;
}
String userName = tmpApp.app.getUser();
- Resource userLimitResource = preCalculatedUserLimit.get(userName);
-
- // Verify whether we already calculated headroom for this user.
- if (userLimitResource == null) {
- userLimitResource = Resources.clone(tq.leafQueue
- .getUserLimitPerUser(userName, partitionBasedResource, partition));
-
- Resource amUsed = perUserAMUsed.get(userName);
- if (null == amUsed) {
- amUsed = Resources.createResource(0, 0);
- }
-
- // Real AM used need not have to be considered for user-limit as well.
- userLimitResource = Resources.subtract(userLimitResource, amUsed);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Userlimit for user '" + userName + "' is :"
- + userLimitResource + ", and amUsed is:" + amUsed);
- }
-
- preCalculatedUserLimit.put(userName, userLimitResource);
- }
-
- Resource idealAssignedForUser = userIdealAssignedMapping.get(userName);
-
- if (idealAssignedForUser == null) {
- idealAssignedForUser = Resources.createResource(0, 0);
- userIdealAssignedMapping.put(userName, idealAssignedForUser);
- }
+ TempUserPerPartition tmpUser = usersPerPartition.get(userName);
+ Resource userLimitResource = tmpUser.getUserLimit();
+ Resource idealAssignedForUser = tmpUser.idealAssigned;
// Calculate total selected container resources from current app.
- getAlreadySelectedPreemptionCandidatesResource(selectedCandidates,
- tmpApp, partition);
+ getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, tmpApp,
+ tmpUser, partition);
// For any app, used+pending will give its idealAssigned. However it will
// be tightly linked to queue's unallocated quota. So lower priority apps
@@ -302,10 +295,11 @@ public class FifoIntraQueuePreemptionPlugin
if (Resources.lessThan(rc, clusterResource, idealAssignedForUser,
userLimitResource)) {
- appIdealAssigned = Resources.min(rc, clusterResource, appIdealAssigned,
+ Resource idealAssigned = Resources.min(rc, clusterResource,
+ appIdealAssigned,
Resources.subtract(userLimitResource, idealAssignedForUser));
tmpApp.idealAssigned = Resources.clone(Resources.min(rc,
- clusterResource, queueReassignableResource, appIdealAssigned));
+ clusterResource, queueReassignableResource, idealAssigned));
Resources.addTo(idealAssignedForUser, tmpApp.idealAssigned);
} else {
continue;
@@ -320,7 +314,8 @@ public class FifoIntraQueuePreemptionPlugin
Resources.subtract(tmpApp.idealAssigned, appUsedExcludedSelected));
}
- Resources.subtractFrom(queueReassignableResource, tmpApp.idealAssigned);
+ Resources.subtractFromNonNegative(queueReassignableResource,
+ tmpApp.idealAssigned);
}
return orderedApps;
@@ -332,7 +327,8 @@ public class FifoIntraQueuePreemptionPlugin
*/
private void getAlreadySelectedPreemptionCandidatesResource(
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
- TempAppPerPartition tmpApp, String partition) {
+ TempAppPerPartition tmpApp, TempUserPerPartition tmpUser,
+ String partition) {
tmpApp.selected = Resources.createResource(0, 0);
Set<RMContainer> containers = selectedCandidates
.get(tmpApp.app.getApplicationAttemptId());
@@ -344,16 +340,23 @@ public class FifoIntraQueuePreemptionPlugin
for (RMContainer cont : containers) {
if (partition.equals(cont.getNodeLabelExpression())) {
Resources.addTo(tmpApp.selected, cont.getAllocatedResource());
+ Resources.addTo(tmpUser.selected, cont.getAllocatedResource());
}
}
}
private PriorityQueue<TempAppPerPartition> createTempAppForResCalculation(
- String partition, Collection<FiCaSchedulerApp> apps,
- TAPriorityComparator taComparator) {
+ TempQueuePerPartition tq, Collection<FiCaSchedulerApp> apps,
+ Resource clusterResource,
+ Map<String, Resource> perUserAMUsed) {
+ TAPriorityComparator taComparator = new TAPriorityComparator();
PriorityQueue<TempAppPerPartition> orderedByPriority = new PriorityQueue<>(
100, taComparator);
+ String partition = tq.partition;
+ Map<String, TempUserPerPartition> usersPerPartition = tq
+ .getUsersPerPartition();
+
// have an internal temp app structure to store intermediate data(priority)
for (FiCaSchedulerApp app : apps) {
@@ -385,56 +388,156 @@ public class FifoIntraQueuePreemptionPlugin
tmpApp.idealAssigned = Resources.createResource(0, 0);
orderedByPriority.add(tmpApp);
+
+ // Create a TempUserPerPartition structure to hold more information
+ // regarding each user's entities such as UserLimit etc. This could
+ // be kept in a user to TempUserPerPartition map for further reference.
+ String userName = app.getUser();
+ if (!usersPerPartition.containsKey(userName)) {
+ ResourceUsage userResourceUsage = tq.leafQueue.getUser(userName)
+ .getResourceUsage();
+
+ TempUserPerPartition tmpUser = new TempUserPerPartition(
+ tq.leafQueue.getUser(userName), tq.queueName,
+ Resources.clone(userResourceUsage.getUsed(partition)),
+ Resources.clone(perUserAMUsed.get(userName)),
+ Resources.clone(userResourceUsage.getReserved(partition)),
+ Resources.none());
+
+ Resource userLimitResource = Resources.clone(
+ tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource,
+ partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY));
+
+ // Real AM used need not have to be considered for user-limit as well.
+ userLimitResource = Resources.subtract(userLimitResource,
+ tmpUser.amUsed);
+ tmpUser.setUserLimit(userLimitResource);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("TempUser:" + tmpUser);
+ }
+
+ tmpUser.idealAssigned = Resources.createResource(0, 0);
+ tq.addUserPerPartition(userName, tmpUser);
+ }
}
return orderedByPriority;
}
/*
* Fifo+Priority based preemption policy need not have to preempt resources at
- * same priority level. Such cases will be validated out.
+ * same priority level. Such cases will be validated out. But if the demand is
+ * from an app of different user, force to preempt resources even if apps are
+ * at same priority.
*/
public void validateOutSameAppPriorityFromDemand(Resource cluster,
- TreeSet<TempAppPerPartition> appsOrderedfromLowerPriority) {
+ TreeSet<TempAppPerPartition> orderedApps,
+ Map<String, TempUserPerPartition> usersPerPartition,
+ IntraQueuePreemptionOrderPolicy intraQueuePreemptionOrder) {
- TempAppPerPartition[] apps = appsOrderedfromLowerPriority
- .toArray(new TempAppPerPartition[appsOrderedfromLowerPriority.size()]);
+ TempAppPerPartition[] apps = orderedApps
+ .toArray(new TempAppPerPartition[orderedApps.size()]);
if (apps.length <= 0) {
return;
}
- int lPriority = 0;
- int hPriority = apps.length - 1;
-
- while (lPriority < hPriority
- && !apps[lPriority].equals(apps[hPriority])
- && apps[lPriority].getPriority() < apps[hPriority].getPriority()) {
- Resource toPreemptFromOther = apps[hPriority]
- .getToBePreemptFromOther();
- Resource actuallyToPreempt = apps[lPriority].getActuallyToBePreempted();
- Resource delta = Resources.subtract(apps[lPriority].toBePreempted,
- actuallyToPreempt);
-
- if (Resources.greaterThan(rc, cluster, delta, Resources.none())) {
- Resource toPreempt = Resources.min(rc, cluster,
- toPreemptFromOther, delta);
-
- apps[hPriority].setToBePreemptFromOther(
- Resources.subtract(toPreemptFromOther, toPreempt));
- apps[lPriority].setActuallyToBePreempted(
- Resources.add(actuallyToPreempt, toPreempt));
- }
+ for (int hPriority = apps.length - 1; hPriority >= 0; hPriority--) {
- if (Resources.lessThanOrEqual(rc, cluster,
- apps[lPriority].toBePreempted,
- apps[lPriority].getActuallyToBePreempted())) {
- lPriority++;
- continue;
- }
+ // Check whether high priority app with demand needs resource from other
+ // user.
+ if (Resources.greaterThan(rc, cluster,
+ apps[hPriority].getToBePreemptFromOther(), Resources.none())) {
- if (Resources.equals(apps[hPriority].getToBePreemptFromOther(),
- Resources.none())) {
- hPriority--;
- continue;
+ // Given we have a demand from a high priority app, we can do a reverse
+ // scan from lower priority apps to select resources.
+ // Since idealAssigned of each app has considered user-limit, this logic
+ // will provide eventual consistency w.r.t user-limit as well.
+ for (int lPriority = 0; lPriority < apps.length; lPriority++) {
+
+ // Check whether app with demand needs resource from other user.
+ if (Resources.greaterThan(rc, cluster, apps[lPriority].toBePreempted,
+ Resources.none())) {
+
+ // If apps are of same user, and priority is same, then skip.
+ if ((apps[hPriority].getUser().equals(apps[lPriority].getUser()))
+ && (apps[lPriority].getPriority() >= apps[hPriority]
+ .getPriority())) {
+ continue;
+ }
+
+ if (Resources.lessThanOrEqual(rc, cluster,
+ apps[lPriority].toBePreempted,
+ apps[lPriority].getActuallyToBePreempted())
+ || Resources.equals(apps[hPriority].getToBePreemptFromOther(),
+ Resources.none())) {
+ continue;
+ }
+
+ // Ideally if any application has a higher priority, then it can
+ // force to preempt any lower priority app from any user. However
+ // if admin enforces user-limit over priority, preemption module
+ // will not choose lower priority apps from usre's who are not yet
+ // met its user-limit.
+ TempUserPerPartition tmpUser = usersPerPartition
+ .get(apps[lPriority].getUser());
+ if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser()))
+ && (!tmpUser.isUserLimitReached(rc, cluster))
+ && (intraQueuePreemptionOrder
+ .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST))) {
+ continue;
+ }
+
+ Resource toPreemptFromOther = apps[hPriority]
+ .getToBePreemptFromOther();
+ Resource actuallyToPreempt = apps[lPriority]
+ .getActuallyToBePreempted();
+
+ // A lower priority app could offer more resource to preempt, if
+ // multiple higher priority/under served users needs resources.
+ // After one iteration, we need to ensure that actuallyToPreempt is
+ // subtracted from the resource to preempt.
+ Resource preemptableFromLowerPriorityApp = Resources
+ .subtract(apps[lPriority].toBePreempted, actuallyToPreempt);
+
+ // In case of user-limit preemption, when app's are from different
+ // user and of same priority, we will do user-limit preemption if
+ // there is a demand from under UL quota app.
+ // However this under UL quota app's demand may be more.
+ // Still we should ensure that we are not doing over preemption such
+ // that only a maximum of (user's used - UL quota) could be
+ // preempted.
+ if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser()))
+ && (apps[lPriority].getPriority() == apps[hPriority]
+ .getPriority())
+ && tmpUser.isUserLimitReached(rc, cluster)) {
+
+ Resource deltaULQuota = Resources
+ .subtract(tmpUser.getUsedDeductAM(), tmpUser.selected);
+ Resources.subtractFrom(deltaULQuota, tmpUser.getUserLimit());
+
+ if (tmpUser.isPreemptionQuotaForULDeltaDone()) {
+ deltaULQuota = Resources.createResource(0, 0);
+ }
+
+ if (Resources.lessThan(rc, cluster, deltaULQuota,
+ preemptableFromLowerPriorityApp)) {
+ tmpUser.updatePreemptionQuotaForULDeltaAsDone(true);
+ preemptableFromLowerPriorityApp = deltaULQuota;
+ }
+ }
+
+ if (Resources.greaterThan(rc, cluster,
+ preemptableFromLowerPriorityApp, Resources.none())) {
+ Resource toPreempt = Resources.min(rc, cluster,
+ toPreemptFromOther, preemptableFromLowerPriorityApp);
+
+ apps[hPriority].setToBePreemptFromOther(
+ Resources.subtract(toPreemptFromOther, toPreempt));
+ apps[lPriority].setActuallyToBePreempted(
+ Resources.add(actuallyToPreempt, toPreempt));
+ }
+ }
+ }
}
}
}
@@ -454,6 +557,40 @@ public class FifoIntraQueuePreemptionPlugin
Resources.addTo(userAMResource, app.getAMResource(partition));
Resources.addTo(amUsed, app.getAMResource(partition));
}
+
return amUsed;
}
+
+ @Override
+ public boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app,
+ Resource clusterResource, Resource usedResource, RMContainer c) {
+ // Ensure below checks
+ // 1. This check must be done only when preemption order is USERLIMIT_FIRST
+ // 2. By selecting container "c", check whether this user's resource usage
+ // is going below its user-limit.
+ // 3. Used resource of user must be always greater than user-limit to
+ // skip some containers as per this check. If used resource is under user
+ // limit, then these containers of this user has to be preempted as demand
+ // might be due to high priority apps running in same user.
+ String partition = context.getScheduler()
+ .getSchedulerNode(c.getAllocatedNode()).getPartition();
+ TempQueuePerPartition tq = context.getQueueByPartition(app.getQueueName(),
+ partition);
+ TempUserPerPartition tmpUser = tq.getUsersPerPartition().get(app.getUser());
+
+ // Given user is not present, skip the check.
+ if (tmpUser == null) {
+ return false;
+ }
+
+ // For ideal resource computations, user-limit got saved by subtracting am
+ // used resource in TempUser. Hence it has to be added back here for
+ // complete check.
+ Resource userLimit = Resources.add(tmpUser.getUserLimit(), tmpUser.amUsed);
+
+ return Resources.lessThanOrEqual(rc, clusterResource,
+ Resources.subtract(usedResource, c.getAllocatedResource()), userLimit)
+ && context.getIntraQueuePreemptionOrderPolicy()
+ .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
index 2890414..e2f311f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
@@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
@@ -31,11 +32,13 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Comparator;
-import java.util.Iterator;
+import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
/**
@@ -51,14 +54,14 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
Comparator<TempAppPerPartition> {
@Override
- public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) {
- Priority p1 = Priority.newInstance(tq1.getPriority());
- Priority p2 = Priority.newInstance(tq2.getPriority());
+ public int compare(TempAppPerPartition ta1, TempAppPerPartition ta2) {
+ Priority p1 = Priority.newInstance(ta1.getPriority());
+ Priority p2 = Priority.newInstance(ta2.getPriority());
if (!p1.equals(p2)) {
return p1.compareTo(p2);
}
- return tq1.getApplicationId().compareTo(tq2.getApplicationId());
+ return ta1.getApplicationId().compareTo(ta2.getApplicationId());
}
}
@@ -121,37 +124,60 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
Map<String, Resource> resToObtainByPartition = fifoPreemptionComputePlugin
.getResourceDemandFromAppsPerQueue(queueName, partition);
- // 6. Based on the selected resource demand per partition, select
+ // Default preemption iterator considers only FIFO+priority. For
+ // userlimit preemption, its possible that some lower priority apps
+ // needs from high priority app of another user. Hence use apps
+ // ordered by userlimit starvation as well.
+ Collection<FiCaSchedulerApp> apps = fifoPreemptionComputePlugin
+ .getPreemptableApps(queueName, partition);
+
+ // 6. Get user-limit to ensure that we do not preempt resources which
+ // will force user's resource to come under its UL.
+ Map<String, Resource> rollingResourceUsagePerUser = new HashMap<>();
+ initializeUsageAndUserLimitForCompute(clusterResource, partition,
+ leafQueue, rollingResourceUsagePerUser);
+
+ // 7. Based on the selected resource demand per partition, select
// containers with known policy from inter-queue preemption.
try {
leafQueue.getReadLock().lock();
- Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy()
- .getPreemptionIterator();
- while (desc.hasNext()) {
- FiCaSchedulerApp app = desc.next();
- preemptFromLeastStarvedApp(selectedCandidates, clusterResource,
- totalPreemptedResourceAllowed, resToObtainByPartition,
- leafQueue, app);
+ for (FiCaSchedulerApp app : apps) {
+ preemptFromLeastStarvedApp(leafQueue, app, selectedCandidates,
+ clusterResource, totalPreemptedResourceAllowed,
+ resToObtainByPartition, rollingResourceUsagePerUser);
}
} finally {
leafQueue.getReadLock().unlock();
}
}
}
-
return selectedCandidates;
}
- private void preemptFromLeastStarvedApp(
+ private void initializeUsageAndUserLimitForCompute(Resource clusterResource,
+ String partition, LeafQueue leafQueue,
+ Map<String, Resource> rollingResourceUsagePerUser) {
+ for (String user : leafQueue.getAllUsers()) {
+ // Initialize used resource of a given user for rolling computation.
+ rollingResourceUsagePerUser.put(user, Resources.clone(
+ leafQueue.getUser(user).getResourceUsage().getUsed(partition)));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Rolling resource usage for user:" + user + " is : "
+ + rollingResourceUsagePerUser.get(user));
+ }
+ }
+ }
+
+ private void preemptFromLeastStarvedApp(LeafQueue leafQueue,
+ FiCaSchedulerApp app,
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
Resource clusterResource, Resource totalPreemptedResourceAllowed,
- Map<String, Resource> resToObtainByPartition, LeafQueue leafQueue,
- FiCaSchedulerApp app) {
+ Map<String, Resource> resToObtainByPartition,
+ Map<String, Resource> rollingResourceUsagePerUser) {
// ToDo: Reuse reservation selector here.
- List<RMContainer> liveContainers = new ArrayList<>(
- app.getLiveContainers());
+ List<RMContainer> liveContainers = new ArrayList<>(app.getLiveContainers());
sortContainers(liveContainers);
if (LOG.isDebugEnabled()) {
@@ -160,6 +186,8 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
+ totalPreemptedResourceAllowed);
}
+ Resource rollingUsedResourcePerUser = rollingResourceUsagePerUser
+ .get(app.getUser());
for (RMContainer c : liveContainers) {
// if there are no demand, return.
@@ -184,12 +212,34 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
continue;
}
+ // If selected container brings down resource usage under its user's
+ // UserLimit (or equals to), we must skip such containers.
+ if (fifoPreemptionComputePlugin.skipContainerBasedOnIntraQueuePolicy(app,
+ clusterResource, rollingUsedResourcePerUser, c)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Skipping container: " + c.getContainerId() + " with resource:"
+ + c.getAllocatedResource() + " as UserLimit for user:"
+ + app.getUser() + " with resource usage: "
+ + rollingUsedResourcePerUser + " is going under UL");
+ }
+ break;
+ }
+
// Try to preempt this container
- CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(
- rc, preemptionContext, resToObtainByPartition, c, clusterResource,
- selectedCandidates, totalPreemptedResourceAllowed);
+ boolean ret = CapacitySchedulerPreemptionUtils
+ .tryPreemptContainerAndDeductResToObtain(rc, preemptionContext,
+ resToObtainByPartition, c, clusterResource, selectedCandidates,
+ totalPreemptedResourceAllowed);
+
+ // Subtract from respective user's resource usage once a container is
+ // selected for preemption.
+ if (ret && preemptionContext.getIntraQueuePreemptionOrderPolicy()
+ .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST)) {
+ Resources.subtractFrom(rollingUsedResourcePerUser,
+ c.getAllocatedResource());
+ }
}
-
}
private void computeIntraQueuePreemptionDemand(Resource clusterResource,
@@ -205,12 +255,7 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
continue;
}
- // 2. Its better to get partition based resource limit earlier before
- // starting calculation
- Resource partitionBasedResource =
- context.getPartitionResource(partition);
-
- // 3. loop through all queues corresponding to a partition.
+ // 2. loop through all queues corresponding to a partition.
for (String queueName : queueNames) {
TempQueuePerPartition tq = context.getQueueByPartition(queueName,
partition);
@@ -221,23 +266,22 @@ public class IntraQueueCandidatesSelector extends PreemptionCandidatesSelector {
continue;
}
- // 4. Consider reassignableResource as (used - actuallyToBePreempted).
+ // 3. Consider reassignableResource as (used - actuallyToBePreempted).
// This provides as upper limit to split apps quota in a queue.
Resource queueReassignableResource = Resources.subtract(tq.getUsed(),
tq.getActuallyToBePreempted());
- // 5. Check queue's used capacity. Make sure that the used capacity is
+ // 4. Check queue's used capacity. Make sure that the used capacity is
// above certain limit to consider for intra queue preemption.
if (leafQueue.getQueueCapacities().getUsedCapacity(partition) < context
.getMinimumThresholdForIntraQueuePreemption()) {
continue;
}
- // 6. compute the allocation of all apps based on queue's unallocated
+ // 5. compute the allocation of all apps based on queue's unallocated
// capacity
fifoPreemptionComputePlugin.computeAppsIdealAllocation(clusterResource,
- partitionBasedResource, tq, selectedCandidates,
- totalPreemptedResourceAllowed,
+ tq, selectedCandidates, totalPreemptedResourceAllowed,
queueReassignableResource,
context.getMaxAllowableLimitForIntraQueuePreemption());
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
index 93ebe65..56fd007 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
@@ -18,12 +18,14 @@
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
+import java.util.Collection;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
interface IntraQueuePreemptionComputePlugin {
@@ -32,8 +34,14 @@ interface IntraQueuePreemptionComputePlugin {
String partition);
void computeAppsIdealAllocation(Resource clusterResource,
- Resource partitionBasedResource, TempQueuePerPartition tq,
+ TempQueuePerPartition tq,
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
Resource totalPreemptedResourceAllowed, Resource queueTotalUnassigned,
float maxAllowablePreemptLimit);
+
+ Collection<FiCaSchedulerApp> getPreemptableApps(String queueName,
+ String partition);
+
+ boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app,
+ Resource clusterResource, Resource usedResource, RMContainer c);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
index 1e684ea..b171b04 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
@@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableSet;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -80,6 +81,16 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
*/
public class ProportionalCapacityPreemptionPolicy
implements SchedulingEditPolicy, CapacitySchedulerPreemptionContext {
+
+ /**
+ * IntraQueuePreemptionOrder will be used to define various priority orders
+ * which could be configured by admin.
+ */
+ @Unstable
+ public enum IntraQueuePreemptionOrderPolicy {
+ PRIORITY_FIRST, USERLIMIT_FIRST;
+ }
+
private static final Log LOG =
LogFactory.getLog(ProportionalCapacityPreemptionPolicy.class);
@@ -96,6 +107,7 @@ public class ProportionalCapacityPreemptionPolicy
private float maxAllowableLimitForIntraQueuePreemption;
private float minimumThresholdForIntraQueuePreemption;
+ private IntraQueuePreemptionOrderPolicy intraQueuePreemptionOrderPolicy;
// Pointer to other RM components
private RMContext rmContext;
@@ -191,6 +203,13 @@ public class ProportionalCapacityPreemptionPolicy
CapacitySchedulerConfiguration.
DEFAULT_INTRAQUEUE_PREEMPTION_MINIMUM_THRESHOLD);
+ intraQueuePreemptionOrderPolicy = IntraQueuePreemptionOrderPolicy
+ .valueOf(csConfig
+ .get(
+ CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ CapacitySchedulerConfiguration.DEFAULT_INTRAQUEUE_PREEMPTION_ORDER_POLICY)
+ .toUpperCase());
+
rc = scheduler.getResourceCalculator();
nlm = scheduler.getRMContext().getNodeLabelManager();
@@ -243,7 +262,6 @@ public class ProportionalCapacityPreemptionPolicy
}
}
- @SuppressWarnings("unchecked")
private void preemptOrkillSelectedContainerAfterWait(
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
long currentTime) {
@@ -656,4 +674,9 @@ public class ProportionalCapacityPreemptionPolicy
}
underServedQueues.add(queueName);
}
+
+ @Override
+ public IntraQueuePreemptionOrderPolicy getIntraQueuePreemptionOrderPolicy() {
+ return intraQueuePreemptionOrderPolicy;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
index fccd2a7..cbc1028 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
@@ -59,13 +59,17 @@ public class TempAppPerPartition extends AbstractPreemptionEntity {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(" NAME: " + getApplicationId()).append(" PRIO: ").append(priority)
- .append(" CUR: ").append(getUsed()).append(" PEN: ").append(pending)
- .append(" RESERVED: ").append(reserved).append(" IDEAL_ASSIGNED: ")
- .append(idealAssigned).append(" PREEMPT_OTHER: ")
- .append(getToBePreemptFromOther()).append(" IDEAL_PREEMPT: ")
- .append(toBePreempted).append(" ACTUAL_PREEMPT: ")
- .append(getActuallyToBePreempted()).append("\n");
+ sb.append("NAME: " + getApplicationId())
+ .append(" PRIO: ").append(priority)
+ .append(" CUR: ").append(getUsed())
+ .append(" PEN: ").append(pending)
+ .append(" RESERVED: ").append(reserved)
+ .append(" IDEAL_ASSIGNED: ").append(idealAssigned)
+ .append(" PREEMPT_OTHER: ").append(getToBePreemptFromOther())
+ .append(" IDEAL_PREEMPT: ").append(toBePreempted)
+ .append(" ACTUAL_PREEMPT: ").append(getActuallyToBePreempted())
+ .append(" SELECTED: ").append(selected)
+ .append("\n");
return sb.toString();
}
@@ -91,8 +95,12 @@ public class TempAppPerPartition extends AbstractPreemptionEntity {
return applicationId;
}
+ public String getUser() {
+ return this.app.getUser();
+ }
+
public void deductActuallyToBePreempted(ResourceCalculator resourceCalculator,
- Resource cluster, Resource toBeDeduct, String partition) {
+ Resource cluster, Resource toBeDeduct) {
if (Resources.greaterThan(resourceCalculator, cluster,
getActuallyToBePreempted(), toBeDeduct)) {
Resources.subtractFrom(getActuallyToBePreempted(), toBeDeduct);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
index 7eab015..89452f9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.Map;
/**
* Temporary data-structure tracking resource availability, pending resource
@@ -59,6 +61,10 @@ public class TempQueuePerPartition extends AbstractPreemptionEntity {
int relativePriority = 0;
TempQueuePerPartition parent = null;
+ // This will hold a temp user data structure and will hold userlimit,
+ // idealAssigned, used etc.
+ Map<String, TempUserPerPartition> usersPerPartition = new LinkedHashMap<>();
+
TempQueuePerPartition(String queueName, Resource current,
boolean preemptionDisabled, String partition, Resource killable,
float absCapacity, float absMaxCapacity, Resource totalPartitionResource,
@@ -289,4 +295,12 @@ public class TempQueuePerPartition extends AbstractPreemptionEntity {
return apps;
}
+ public void addUserPerPartition(String userName,
+ TempUserPerPartition tmpUser) {
+ this.usersPerPartition.put(userName, tmpUser);
+ }
+
+ public Map<String, TempUserPerPartition> getUsersPerPartition() {
+ return usersPerPartition;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java
new file mode 100644
index 0000000..245b5d4
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.User;
+import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.Resources;
+
+
+/**
+ * Temporary data-structure tracking resource availability, pending resource
+ * need, current utilization for an application.
+ */
+public class TempUserPerPartition extends AbstractPreemptionEntity {
+
+ private final User user;
+ private Resource userLimit;
+ private boolean donePreemptionQuotaForULDelta = false;
+
+ TempUserPerPartition(User user, String queueName, Resource usedPerPartition,
+ Resource amUsedPerPartition, Resource reserved,
+ Resource pendingPerPartition) {
+ super(queueName, usedPerPartition, amUsedPerPartition, reserved,
+ pendingPerPartition);
+ this.user = user;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(" NAME: " + getUserName()).append(" CUR: ").append(getUsed())
+ .append(" PEN: ").append(pending).append(" RESERVED: ").append(reserved)
+ .append(" AM_USED: ").append(amUsed).append(" USER_LIMIT: ")
+ .append(getUserLimit()).append(" IDEAL_ASSIGNED: ")
+ .append(idealAssigned).append(" USED_WO_AMUSED: ")
+ .append(getUsedDeductAM()).append(" IDEAL_PREEMPT: ")
+ .append(toBePreempted).append(" ACTUAL_PREEMPT: ")
+ .append(getActuallyToBePreempted()).append("\n");
+
+ return sb.toString();
+ }
+
+ public String getUserName() {
+ return user.getUserName();
+ }
+
+ public Resource getUserLimit() {
+ return userLimit;
+ }
+
+ public void setUserLimit(Resource userLimitResource) {
+ this.userLimit = userLimitResource;
+ }
+
+ public boolean isUserLimitReached(ResourceCalculator rc,
+ Resource clusterResource) {
+ if (Resources.greaterThan(rc, clusterResource, getUsedDeductAM(),
+ userLimit)) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isPreemptionQuotaForULDeltaDone() {
+ return this.donePreemptionQuotaForULDelta;
+ }
+
+ public void updatePreemptionQuotaForULDeltaAsDone(boolean done) {
+ this.donePreemptionQuotaForULDelta = done;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
index 9fb92ec..026dd82 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -1233,6 +1233,14 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
public static final float DEFAULT_INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT =
0.2f;
+ /**
+ * For intra-queue preemption, enforce a preemption order such as
+ * "userlimit_first" or "priority_first".
+ */
+ public static final String INTRAQUEUE_PREEMPTION_ORDER_POLICY = PREEMPTION_CONFIG_PREFIX
+ + INTRA_QUEUE_PREEMPTION_CONFIG_PREFIX + "preemption-order-policy";
+ public static final String DEFAULT_INTRAQUEUE_PREEMPTION_ORDER_POLICY = "userlimit_first";
+
/**
* Maximum application for a queue to be used when application per queue is
* not defined.To be consistent with previous version the default value is set
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 2b1efd6..71225b8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -486,7 +486,7 @@ public class LeafQueue extends AbstractCSQueue {
writeLock.lock();
User u = users.get(userName);
if (null == u) {
- u = new User();
+ u = new User(userName);
users.put(userName, u);
}
return u;
@@ -1292,7 +1292,7 @@ public class LeafQueue extends AbstractCSQueue {
String partition) {
return getHeadroom(user, queueCurrentLimit, clusterResource,
computeUserLimit(application.getUser(), clusterResource, user,
- partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY),
+ partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, true),
partition);
}
@@ -1366,7 +1366,7 @@ public class LeafQueue extends AbstractCSQueue {
// TODO, need consider headroom respect labels also
Resource userLimit =
computeUserLimit(application.getUser(), clusterResource, queueUser,
- nodePartition, schedulingMode);
+ nodePartition, schedulingMode, true);
setQueueResourceLimitsInfo(clusterResource);
@@ -1410,7 +1410,7 @@ public class LeafQueue extends AbstractCSQueue {
@Lock(NoLock.class)
private Resource computeUserLimit(String userName,
Resource clusterResource, User user,
- String nodePartition, SchedulingMode schedulingMode) {
+ String nodePartition, SchedulingMode schedulingMode, boolean forActive) {
Resource partitionResource = labelManager.getResourceByLabel(nodePartition,
clusterResource);
@@ -1462,16 +1462,21 @@ public class LeafQueue extends AbstractCSQueue {
// queue's configured capacity * user-limit-factor.
// Also, the queue's configured capacity should be higher than
// queue-hard-limit * ulMin
-
- final int activeUsers = activeUsersManager.getNumActiveUsers();
-
+
+ final int usersCount;
+ if (forActive) {
+ usersCount = activeUsersManager.getNumActiveUsers();
+ } else {
+ usersCount = users.size();
+ }
+
// User limit resource is determined by:
// max{currentCapacity / #activeUsers, currentCapacity *
// user-limit-percentage%)
Resource userLimitResource = Resources.max(
resourceCalculator, partitionResource,
Resources.divideAndCeil(
- resourceCalculator, currentCapacity, activeUsers),
+ resourceCalculator, currentCapacity, usersCount),
Resources.divideAndCeil(
resourceCalculator,
Resources.multiplyAndRoundDown(
@@ -1519,14 +1524,16 @@ public class LeafQueue extends AbstractCSQueue {
" qconsumed: " + queueUsage.getUsed() +
" consumedRatio: " + totalUserConsumedRatio +
" currentCapacity: " + currentCapacity +
- " activeUsers: " + activeUsers +
+ " activeUsers: " + usersCount +
" clusterCapacity: " + clusterResource +
" resourceByLabel: " + partitionResource +
" usageratio: " + qUsageRatios.getUsageRatio(nodePartition) +
" Partition: " + nodePartition
);
}
- user.setUserResourceLimit(userLimitResource);
+ if (forActive) {
+ user.setUserResourceLimit(userLimitResource);
+ }
return userLimitResource;
}
@@ -1955,11 +1962,14 @@ public class LeafQueue extends AbstractCSQueue {
volatile int activeApplications = 0;
private UsageRatios userUsageRatios = new UsageRatios();
private WriteLock writeLock;
+ String userName;
- User() {
+ @VisibleForTesting
+ public User(String name) {
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
// Nobody uses read-lock now, will add it when necessary
writeLock = lock.writeLock();
+ this.userName = name;
}
public ResourceUsage getResourceUsage() {
@@ -2073,6 +2083,15 @@ public class LeafQueue extends AbstractCSQueue {
public void setUserResourceLimit(Resource userResourceLimit) {
this.userResourceLimit = userResourceLimit;
}
+
+ public String getUserName() {
+ return this.userName;
+ }
+
+ @VisibleForTesting
+ public void setResourceUsage(ResourceUsage resourceUsage) {
+ this.userResourceUsage = resourceUsage;
+ }
}
@Override
@@ -2158,7 +2177,7 @@ public class LeafQueue extends AbstractCSQueue {
User user = getUser(userName);
Resource headroom = Resources.subtract(
computeUserLimit(app.getUser(), clusterResources, user, partition,
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY),
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, true),
user.getUsed(partition));
// Make sure headroom is not negative.
headroom = Resources.componentwiseMax(headroom, Resources.none());
@@ -2195,7 +2214,7 @@ public class LeafQueue extends AbstractCSQueue {
User user = getUser(userName);
return computeUserLimit(userName, resources, user, partition,
- SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, true);
}
@Override
@@ -2377,4 +2396,26 @@ public class LeafQueue extends AbstractCSQueue {
writeLock.unlock();
}
}
+
+ /**
+ * Get all valid users in this queue.
+ * @return user list
+ */
+ public Set<String> getAllUsers() {
+ return this.users.keySet();
+ }
+
+ public Resource getResourceLimitForActiveUsers(String userName,
+ Resource clusterResource, String partition,
+ SchedulingMode schedulingMode) {
+ return computeUserLimit(userName, clusterResource, getUser(userName),
+ partition, schedulingMode, true);
+ }
+
+ public synchronized Resource getResourceLimitForAllUsers(String userName,
+ Resource clusterResource, String partition, SchedulingMode schedulingMode)
+ {
+ return computeUserLimit(userName, clusterResource, getUser(userName),
+ partition, schedulingMode, false);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
index 32b2c68..faac129 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
@@ -42,8 +42,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.User;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
@@ -69,7 +71,6 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -96,6 +97,7 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
Clock mClock = null;
CapacitySchedulerConfiguration conf = null;
CapacityScheduler cs = null;
+ @SuppressWarnings("rawtypes")
EventHandler<SchedulerEvent> mDisp = null;
ProportionalCapacityPreemptionPolicy policy = null;
Resource clusterResource = null;
@@ -247,6 +249,7 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
if (containerId == 1) {
when(rmc.isAMContainer()).thenReturn(true);
when(app.getAMResource(label)).thenReturn(res);
+ when(app.getAppAMNodePartitionName()).thenReturn(label);
}
if (reserved) {
@@ -280,6 +283,12 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
containerId++;
}
+ // If app has 0 container, and it has only pending, still make sure to
+ // update label.
+ if (repeat == 0) {
+ when(app.getAppAMNodePartitionName()).thenReturn(label);
+ }
+
// Some more app specific aggregated data can be better filled here.
when(app.getPriority()).thenReturn(pri);
when(app.getUser()).thenReturn(userName);
@@ -315,10 +324,15 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
private void mockApplications(String appsConfig) {
int id = 1;
HashMap<String, HashSet<String>> userMap = new HashMap<String, HashSet<String>>();
+ HashMap<String, HashMap<String, HashMap<String, ResourceUsage>>> userResourceUsagePerLabel = new HashMap<>();
LeafQueue queue = null;
+ int mulp = -1;
for (String a : appsConfig.split(";")) {
String[] strs = a.split("\t");
String queueName = strs[0];
+ if (mulp <= 0 && strs.length > 2 && strs[2] != null) {
+ mulp = 100 / (new Integer(strs[2]).intValue());
+ }
// get containers
List<RMContainer> liveContainers = new ArrayList<RMContainer>();
@@ -338,6 +352,7 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
when(app.getReservedContainers()).thenReturn(reservedContainers);
when(app.getApplicationAttemptId()).thenReturn(appAttemptId);
when(app.getApplicationId()).thenReturn(appId);
+ when(app.getQueueName()).thenReturn(queueName);
// add to LeafQueue
queue = (LeafQueue) nameToCSQueues.get(queueName);
@@ -351,20 +366,70 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
}
users.add(app.getUser());
+
+ String label = app.getAppAMNodePartitionName();
+
+ // Get label to queue
+ HashMap<String, HashMap<String, ResourceUsage>> userResourceUsagePerQueue = userResourceUsagePerLabel
+ .get(label);
+ if (null == userResourceUsagePerQueue) {
+ userResourceUsagePerQueue = new HashMap<>();
+ userResourceUsagePerLabel.put(label, userResourceUsagePerQueue);
+ }
+
+ // Get queue to user based resource map
+ HashMap<String, ResourceUsage> userResourceUsage = userResourceUsagePerQueue
+ .get(queueName);
+ if (null == userResourceUsage) {
+ userResourceUsage = new HashMap<>();
+ userResourceUsagePerQueue.put(queueName, userResourceUsage);
+ }
+
+ // Get user to its resource usage.
+ ResourceUsage usage = userResourceUsage.get(app.getUser());
+ if (null == usage) {
+ usage = new ResourceUsage();
+ userResourceUsage.put(app.getUser(), usage);
+ }
+
+ usage.incAMUsed(app.getAMResource(label));
+ usage.incUsed(app.getAppAttemptResourceUsage().getUsed(label));
id++;
}
- for (String queueName : userMap.keySet()) {
- queue = (LeafQueue) nameToCSQueues.get(queueName);
- // Currently we have user-limit test support only for default label.
- Resource totResoucePerPartition = partitionToResource.get("");
- Resource capacity = Resources.multiply(totResoucePerPartition,
- queue.getQueueCapacities().getAbsoluteCapacity());
- HashSet<String> users = userMap.get(queue.getQueueName());
- Resource userLimit = Resources.divideAndCeil(rc, capacity, users.size());
- for (String user : users) {
- when(queue.getUserLimitPerUser(eq(user), any(Resource.class),
- anyString())).thenReturn(userLimit);
+ for (String label : userResourceUsagePerLabel.keySet()) {
+ for (String queueName : userMap.keySet()) {
+ queue = (LeafQueue) nameToCSQueues.get(queueName);
+ // Currently we have user-limit test support only for default label.
+ Resource totResoucePerPartition = partitionToResource.get("");
+ Resource capacity = Resources.multiply(totResoucePerPartition,
+ queue.getQueueCapacities().getAbsoluteCapacity());
+ HashSet<String> users = userMap.get(queue.getQueueName());
+ when(queue.getAllUsers()).thenReturn(users);
+ Resource userLimit;
+ if (mulp > 0) {
+ userLimit = Resources.divideAndCeil(rc, capacity, mulp);
+ } else {
+ userLimit = Resources.divideAndCeil(rc, capacity,
+ users.size());
+ }
+ LOG.debug("Updating user-limit from mock: totResoucePerPartition="
+ + totResoucePerPartition + ", capacity=" + capacity
+ + ", users.size()=" + users.size() + ", userlimit= " + userLimit
+ + ",label= " + label + ",queueName= " + queueName);
+
+ HashMap<String, ResourceUsage> userResourceUsage = userResourceUsagePerLabel
+ .get(label).get(queueName);
+ for (String userName : users) {
+ User user = new User(userName);
+ if (userResourceUsage != null) {
+ user.setResourceUsage(userResourceUsage.get(userName));
+ }
+ when(queue.getUser(eq(userName))).thenReturn(user);
+ when(queue.getResourceLimitForAllUsers(eq(userName),
+ any(Resource.class), anyString(), any(SchedulingMode.class)))
+ .thenReturn(userLimit);
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/eda4ac07/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
index bf83e1c..6c5aa67 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
@@ -62,12 +62,16 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
* Apps which are running at low priority (4) will preempt few of its
* resources to meet the demand.
*/
+
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
+
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
"n1= res=100";
String queuesConfig =
// guaranteed,max,used,pending,reserved
- "root(=[100 100 80 120 0]);" + // root
+ "root(=[100 100 79 120 0]);" + // root
"-a(=[11 100 11 50 0]);" + // a
"-b(=[40 100 38 60 0]);" + // b
"-c(=[20 100 10 10 0]);" + // c
@@ -304,6 +308,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -357,6 +363,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
// report "ideal" preempt as 10%. Ensure preemption happens only for 10%
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
(float) 0.1);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -411,6 +419,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -418,7 +428,7 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
String queuesConfig =
// guaranteed,max,used,pending,reserved
"root(=[100 100 95 170 0]);" + // root
- "-a(=[60 100 70 50 0]);" + // a
+ "-a(=[60 100 70 35 0]);" + // a
"-b(=[40 100 25 120 0])"; // b
String appsConfig =
@@ -467,6 +477,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -516,6 +528,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
* cycle. Eventhough there are more demand and no other low priority
* apps are present, still AM contaier need to soared.
*/
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -660,6 +674,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;" + // default partition
"x=100,true"; // partition=x
@@ -720,6 +736,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;";
String nodesConfig = // n1 has no label
@@ -840,8 +858,10 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
policy.editSchedule();
// Considering user-limit of 50% since only 2 users are there, only preempt
- // 15 more (5 is already running) eventhough demand is for 30.
- verify(mDisp, times(15)).handle(argThat(
+ // 14 more (5 is already running) eventhough demand is for 30. Ideally we
+ // must preempt 15. But 15th container will bring user1's usage to 20 which
+ // is same as user-limit. Hence skip 15th container.
+ verify(mDisp, times(14)).handle(argThat(
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
getAppAttemptId(3))));
}
@@ -869,6 +889,8 @@ public class TestProportionalCapacityPreemptionPolicyIntraQueue
conf.setFloat(CapacitySchedulerConfiguration.
INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
(float) 0.5);
+ conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
+ "priority_first");
String labelsConfig = "=100,true;" + // default partition
"x=100,true"; // partition=x
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org