You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2015/03/08 15:46:00 UTC
svn commit: r1665005 - in
/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler:
NodepoolScheduler.java Scheduler.java
Author: challngr
Date: Sun Mar 8 14:46:00 2015
New Revision: 1665005
URL: http://svn.apache.org/r1665005
Log:
UIMA-4275 Allotment for FIXED_SHARE, and never refuse them.
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java?rev=1665005&r1=1665004&r2=1665005&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java Sun Mar 8 14:46:00 2015
@@ -118,24 +118,6 @@ public class NodepoolScheduler
this.evictionPolicy = ep;
}
- /**
- * Convert the absoute / percent cap into an absolute cap over some basis.
- * This is a helper for computing the share or machine caps.
- */
- private int calcCaps(int absolute, double percent, int basis)
- {
- int perccap = Integer.MAX_VALUE; // the cap, calculated from percent
- if ( percent < 1.0 ) {
- double b = basis;
- b = b * percent;
- perccap = (int) Math.round(b);
- } else {
- perccap = basis;
- }
-
- return Math.min(absolute, perccap);
- }
-
private void reworknShares(int[] vshares, int[] nshares)
{
// now redo nshares
@@ -1189,22 +1171,18 @@ public class NodepoolScheduler
for ( ResourceClass rc : rcs ) {
ArrayList<IRmJob> jobs = rc.getAllJobsSorted(new JobByTimeSorter());
- int shares_given_out = 0; // q-shares, to compare against machine capacity
for ( IRmJob j : jobs ) {
- shares_given_out += (j.countNShares() * j.getShareOrder());
- j.clearShares();
+ j.clearShares(); // reset shares assigned at start of each schedling cycle
}
NodePool np = rc.getNodepool();
- int classcap = 0;
- classcap = calcCaps(rc.getAbsoluteCap(), rc.getPercentCap(), np.countTotalShares()); // quantum shares
for ( IRmJob j : jobs ) {
- int n_instances = j.countInstances(); // n-shrares; virtual shares - API treats this as a reservation
- // and we overload the n-machines field for the count.
- if ( j.countNShares() > 0 ) {
+ int n_instances = j.countInstances(); // n-shrares; virtual shares
+
+ if ( j.countNShares() > 0 ) { // all-or-nothing check
// already accounted for as well, since it is a non-preemptable share
logger.info(methodName, j.getId(), "[stable]", "requested", n_instances, "assigned", j.countNShares(), "processes, ",
(j.countNShares() * j.getShareOrder()), "QS");
@@ -1212,9 +1190,6 @@ public class NodepoolScheduler
gbo[j.getShareOrder()] = j.countNShares(); // must set the allocation so eviction works right
- // If node dies n_instances may be > countNShares() so we don't do it this way any more. UIMA-3614
- // gbo[j.getShareOrder()] = n_instances; // must set the allocation so eviction works right
-
j.setGivenByOrder(gbo);
continue;
}
@@ -1227,38 +1202,29 @@ public class NodepoolScheduler
// Don't schedule non-preemptable shares over subpools
if ( np.countLocalShares() < n_instances ) {
- schedulingUpdate.refuse(j, "1 Job refused because insufficient resources are availble. Available for class "
- + rc.getName() + ": "
- + np.countLocalShares()
- + "requested:" + n_instances);
+ schedulingUpdate.defer(j, "Job deferred because insufficient resources are availble for this class.");
- logger.warn(methodName, j.getId(), "1 Cannot accept Fixed Share job nodepool " + np.getId()
- + " has insufficient nodes left. Available["
+ logger.warn(methodName, j.getId(), "1 Deferring sixed share job because nodepool " + np.getId()
+ + " has insufficient space left. Available["
+ np.countLocalShares()
+ "] requested[" + n_instances + "]");
continue;
}
//
- // Now see if we have sufficient shares in the system for this allocation. Note that pool nodes are accounted for here as well.
+ // Now see if we have sufficient shares in the system for this allocation.
//
if ( np.countNSharesByOrder(order) < n_instances ) { // countSharesByOrder is N shares, as is minshares
- schedulingUpdate.refuse(j, "2 Job refused because insufficient resources are availble.");
- logger.warn(methodName, j.getId(), "2 Cannot accept Fixed Share job, insufficient shares available. Available[" + np.countNSharesByOrder(order) + "] requested[" + n_instances + "]");
+ schedulingUpdate.defer(j, "Job deferred because insufficient resources are availble.");
+ logger.warn(methodName, j.getId(), "2 Deferring fixed share job, insufficient shares available. Available[" + np.countNSharesByOrder(order) + "] requested[" + n_instances + "]");
continue;
}
//
// Make sure this allocation does not blow the class cap.
//
- shares_given_out += (n_instances * order);
- if ( shares_given_out > classcap ) { // to q-shares before comparing
- schedulingUpdate.refuse(j, "3 Job refused because class cap of " + classcap + " is exceeded.");
- continue;
- }
-
- if ( rc.getMaxProcesses() < n_instances ) { // Does it blow the configured limit for this class?
- schedulingUpdate.refuse(j, "3 Job refused because class max of " + rc.getMaxProcesses() + " is exceeded.");
+ if ( rc.allotmentExceeded(j) ) {
+ schedulingUpdate.defer(j, "Job deferred because allotment of " + rc.getAllotment(j) + "GB is exceeded by user " + j.getUserName());
continue;
}
@@ -1296,6 +1262,14 @@ public class NodepoolScheduler
continue;
}
+ if ( j.isRefused() ) { // bypass jobs that we know can't be allocated. unlikely after UIMA-4275.
+ continue;
+ }
+
+ if ( j.isDeferred() ) { // UIMA-4275 - still waiting for an allocation
+ continue;
+ }
+
int order = j.getShareOrder();
int count = j.countNSharesGiven();
int avail = np.countNSharesByOrder(order);
@@ -1316,7 +1290,7 @@ public class NodepoolScheduler
//
- // If not we're waiting on preemptions which will occur naturally, or by forcible eviction of squatters.
+ // If nothing assigned we're waiting on preemptions which will occur naturally, or by forcible eviction of squatters.
//
}
}
@@ -1344,7 +1318,6 @@ public class NodepoolScheduler
// Get jobs into order by submission time - new ones ones may just be out of luck
ArrayList<IRmJob> jobs = rc.getAllJobsSorted(new JobByTimeSorter());
- int machines_given_out = 0;
NodePool np = rc.getNodepool();
@@ -1373,7 +1346,6 @@ public class NodepoolScheduler
gbo[j.getShareOrder()] = j.countNShares(); // UIMA-3614 - may be < Instances if machine is purged
j.setGivenByOrder(gbo);
- machines_given_out += nshares;
jlist.remove();
continue;
}
@@ -1402,7 +1374,6 @@ public class NodepoolScheduler
int order = j.getShareOrder(); // memory, coverted to order, so we can find stuff
int nrequested = j.countInstances(); // in machines
- int classcap;
if ( np.countLocalMachines() == 0 ) {
schedulingUpdate.defer(j, "Reservation deferred because resources are exhausted.");
@@ -1414,7 +1385,7 @@ public class NodepoolScheduler
}
if ( rc.allotmentExceeded(j) ) { // Does it blow the configured limit for this class?
- schedulingUpdate.defer(j, "Reservation deferred because allotment of " + rc.getAllotment(j) + " is exceeded by user " + j.getUserName());
+ schedulingUpdate.defer(j, "Reservation deferred because allotment of " + rc.getAllotment(j) + "GB is exceeded by user " + j.getUserName());
continue;
}
@@ -1423,7 +1394,6 @@ public class NodepoolScheduler
int[] gbo = NodePool.makeArray();
gbo[order] = nrequested;
j.setGivenByOrder(gbo);
- machines_given_out += nrequested;
int given = 0;
if ( rc.enforceMemory() ) {
@@ -1544,7 +1514,7 @@ public class NodepoolScheduler
for (ResourceClass rc : resourceClasses.values() ) {
if ( rc.getPolicy() == Policy.FAIR_SHARE ) {
NodePool np = rc.getNodepool();
- NodePool check = getNodepool(rc);
+ // NodePool check = getNodepool(rc);
HashMap<IRmJob, IRmJob> jobs = rc.getAllJobs();
for ( IRmJob j : jobs.values() ) {
HashMap<Share, Share> shares = j.getAssignedShares();
@@ -1871,7 +1841,7 @@ public class NodepoolScheduler
List<Share> potentialShares = new ArrayList<Share>();
for ( Share s : sh ) {
IRmJob j = s.getJob();
- User u = j.getUser();
+ // User u = j.getUser();
if ( s.isForceable() ) {
if ( candidateJobs.containsKey(j) ) {
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1665005&r1=1665004&r2=1665005&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Sun Mar 8 14:46:00 2015
@@ -968,10 +968,11 @@ public class Scheduler
continue;
}
- if ( share_order > max_order ) {
- upd.refuse(j, "Memory requested " + j.getMemory() + "GB exceeds the capacity of any machine in the cluster.");
- continue;
- }
+ // UIMA-4275 never refuse impossible work, just let it hang out
+// if ( share_order > max_order ) {
+// upd.refuse(j, "Memory requested " + j.getMemory() + "GB exceeds the capacity of any machine in the cluster.");
+// continue;
+// }
/**
* We want to allow this - a normal job, submitted to a reservation class.