You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by bu...@apache.org on 2016/04/27 15:36:27 UTC
svn commit: r1741262 - in /uima/sandbox/uima-ducc/trunk: src/main/admin/
uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/
Author: burn
Date: Wed Apr 27 13:36:27 2016
New Revision: 1741262
URL: http://svn.apache.org/viewvc?rev=1741262&view=rev
Log:
UIMA-4910 Avoid offline and dead nodes when allocating in a reserve class
Modified:
uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy Wed Apr 27 13:36:27 2016
@@ -32,19 +32,21 @@ class DuccRmQOccupancy(DuccUtil):
def format(self, nodes, shares):
- print("%20s %11s %6s %6s %15s %10s %6s %6s %6s %8s %7s %10s %8s" % ("Node", "Blacklisted", "Online", "Status", "Nodepool", "Memory", "Order", "Free", "In-Use", "Np-InUse", "Quantum", "Reservable", "Classes"))
- print("%20s %11s %6s %6s %15s %10s %6s %6s %6s %8s %7s %10s %8s" % ("----", "-----------", "------", "------", "--------", "------", "-----", "----", "------", "--------", "-------", "----------", "-------"))
+ typemap = {'R':'Res', 'M':' AP', 'J':'Job', 'S':'Svc'}
+ print("%20s %6s %11s %6s %10s %15s %10s %6s %6s %6s %8s %7s %10s %8s" % ("Node", "Status", "Blacklisted", "Online", "Responsive", "Nodepool", "Memory", "Order", "Free", "In-Use", "Np-InUse", "Quantum", "Reservable", "Classes"))
+ print("%20s %6s %11s %6s %10s %15s %10s %6s %6s %6s %8s %7s %10s %8s" % ("----", "------", "-----------", "------", "----------", "--------", "------", "-----", "----", "------", "--------", "-------", "----------", "-------"))
for n in nodes:
- if (n['responsive']):
+ if (n['blacklisted'] == 'False' and n['online'] == 'True' and n['responsive'] == 'True'):
status = 'up'
else:
status = 'down'
- print "%20s %11s %6s %6s %15s %10s %3s(Q) %6s %6s %8s %7s %10s %-8s" % (n['name'], n['blacklisted'], n['online'], status, n['nodepool'],
+ print "%20s %6s %11s %6s %10s %15s %10s %3s(Q) %6s %6s %8s %7s %10s %-8s" % (n['name'], status, n['blacklisted'], n['online'], n['responsive'], n['nodepool'],
n['memory'], n['share_order'], n['shares_left'], n['assignments'],
n['np_assignments'], n['quantum'], n['reservable'], n['classes'])
if ( shares.has_key(n['name']) ):
for s in shares[n['name']]:
- fmt = '%19s ' + s['jobtype'] +'[%8s] S[%8s] O[%s] II[%8s] IR[%8s] E[%5s] P[%5s] F[%5s] S[%10s]'
+ type = typemap[s['jobtype']]
+ fmt = '%19s ' + type +':%-8s ShareId:%-8s Shares:%-s InitTime:%-8s Investment:%-8s Evicted:%-5s Purged:%-5s Fixed:%-5s State:%-10s'
state = s['state']
if ( state == 'null' ):
state = "Assigned"
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java Wed Apr 27 13:36:27 2016
@@ -233,27 +233,24 @@ public class Machine
// yet assigning to a job so we have to check both the shares given out, and whether the virtual share order is
// still pristine.
//
- // We use this trick so we can use the "normal" allocation mechanisms for bookeeping without special-casing reservations.
+ // We use this trick so we can use the "normal" allocation mechanisms for bookkeeping without special-casing reservations.
//
- // UIMA-4142, include blacklist considerations
- return ( (activeShares.size()) == 0 && (virtual_share_order == share_order) && ( !isBlacklisted() ) );
+ // UIMA-4920, called only if isSchedulable is true
+ return ( (activeShares.size()) == 0 && (virtual_share_order == share_order) );
}
/**
* Can preemption free this machine?
+ * UIMA-4920, called only if isSchedulable is true
*/
public boolean isFreeable()
{
- boolean answer = true;
- // UIMA-4142, include blacklist considerations
- if ( isBlacklisted() ) return false;
-
for ( Share s : activeShares.values() ) {
if ( s.isFixed() ) {
return false;
}
}
- return answer;
+ return true;
}
public int countNpShares()
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Wed Apr 27 13:36:27 2016
@@ -226,7 +226,7 @@ class NodePool
return count;
}
/**
- * How many do I have, including recusring down the children?
+ * How many do I have, including recursing down the children?
*/
int countUnresponsiveMachines()
{
@@ -321,7 +321,7 @@ class NodePool
if ( mlist == null ) return 0;
for ( Machine m : mlist.values() ) {
- if ( m.isFree() ) {
+ if ( isSchedulable(m) && m.isFree() ) {
cnt++;
}
}
@@ -1374,6 +1374,10 @@ class NodePool
while ( iter.hasNext() && (given < needed) ) {
Machine m = iter.next();
logger.info(methodName, j.getId(), "Examining", m.getId());
+ if ( !isSchedulable(m) ) {
+ logger.info(methodName, j.getId(), "Bypass because machine", m.getId(), "is offline or unresponsive or blacklisted");
+ continue;
+ }
if ( preemptables.containsKey(m.key()) ) { // already counted, don't count twice
logger.info(methodName, j.getId(), "Bypass because machine", m.getId(), "already counted.");
continue;
@@ -1556,7 +1560,7 @@ class NodePool
// Machs is all candidate machines, ordered by empty, then most preferable, according to the eviction policy.
for ( Machine mm : machs ) {
- if ( mm.isFree() ) {
+ if ( isSchedulable(mm) && mm.isFree() ) {
Share s = new Share(mm, job, mm.getShareOrder());
s.setFixed();
connectShare(s, mm, job, mm.getShareOrder());
@@ -1982,7 +1986,8 @@ class NodePool
}
//
- // Order shares by INCRESING investment
+ // Order shares by INCREASING investment
+ // Note: Machines may not be schedulable but that is checked after sorting in FindMachines
//
class ReservationSorter
implements Comparator<Machine>
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Wed Apr 27 13:36:27 2016
@@ -817,7 +817,7 @@ public class Scheduler
}
HashMap<Node, Integer> nodeUpdates = new HashMap<Node, Integer>();
- synchronized(deadNodes) {
+ synchronized(illNodes) {
nodeUpdates.putAll(illNodes);
illNodes.clear();
}
@@ -877,7 +877,7 @@ public class Scheduler
* We first accept any changes and requests from the outside world and place them where they
* can be acted on in this epoch.
*
- * We then pass all relevent requests and resources to the IScheduler. This returns a
+ * We then pass all relevant requests and resources to the IScheduler. This returns a
* SchedulingUpdate which is passed to the dispatcher to be acted upon.
*/
public JobManagerUpdate schedule()
@@ -1420,13 +1420,14 @@ public class Scheduler
//
// Not a cheap query, by the way.
//
+ // NOTE: No longer used by the rm_qoccupancy script which now goes directly to the database
+ //
+
+
for ( NodePool np : nodepools ) {
- Collection<Machine> machs = np.getAllMachines().values();
- for ( Machine m : machs ) {
- ret.addMachine(m.queryMachine());
- }
-
+ // NOTE: The offline & dead nodes are also in the AllMachines list so must be removed
+ Map<Node, Machine> allMachs = np.getAllMachines();
Map<Node, Machine> offline = np.getOfflineMachines(); // UIMA-4234
Map<Node, Machine> unresponsive = np.getUnresponsiveMachines(); // UIMA-4234
@@ -1439,6 +1440,7 @@ public class Scheduler
qm.setUnresponsive();
}
ret.addMachine(qm);
+ allMachs.remove(n);
}
for ( Node n : unresponsive.keySet() ) {
@@ -1446,7 +1448,13 @@ public class Scheduler
RmQueriedMachine qm = m.queryMachine();
qm.setUnresponsive();
ret.addMachine(qm);
- }
+ allMachs.remove(n);
+ }
+
+ for ( Node n : allMachs.keySet() ) {
+ Machine m = allMachs.get(n);
+ ret.addMachine(m.queryMachine());
+ }
}
return ret;