You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by bu...@apache.org on 2016/04/27 15:36:27 UTC

svn commit: r1741262 - in /uima/sandbox/uima-ducc/trunk: src/main/admin/ uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/

Author: burn
Date: Wed Apr 27 13:36:27 2016
New Revision: 1741262

URL: http://svn.apache.org/viewvc?rev=1741262&view=rev
Log:
UIMA-4910 Avoid offline and dead nodes when allocating in a reserve class

Modified:
    uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy
    uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java

Modified: uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/admin/rm_qoccupancy Wed Apr 27 13:36:27 2016
@@ -32,19 +32,21 @@ class DuccRmQOccupancy(DuccUtil):
 
 
     def format(self, nodes, shares):
-        print("%20s %11s %6s %6s %15s %10s %6s %6s %6s %8s %7s %10s %8s" %  ("Node", "Blacklisted", "Online", "Status", "Nodepool", "Memory", "Order", "Free", "In-Use", "Np-InUse", "Quantum", "Reservable", "Classes"))
-        print("%20s %11s %6s %6s %15s %10s %6s %6s %6s %8s %7s %10s %8s" %  ("----", "-----------", "------", "------", "--------", "------", "-----", "----", "------", "--------", "-------", "----------", "-------"))
+        typemap = {'R':'Res', 'M':' AP', 'J':'Job', 'S':'Svc'}
+        print("%20s %6s %11s %6s %10s %15s %10s %6s %6s %6s %8s %7s %10s %8s" %  ("Node", "Status", "Blacklisted", "Online", "Responsive", "Nodepool", "Memory", "Order", "Free", "In-Use", "Np-InUse", "Quantum", "Reservable", "Classes"))
+        print("%20s %6s %11s %6s %10s %15s %10s %6s %6s %6s %8s %7s %10s %8s" %  ("----", "------", "-----------", "------", "----------", "--------", "------", "-----", "----", "------", "--------", "-------", "----------", "-------"))
         for n in nodes:
-            if (n['responsive']):
+            if (n['blacklisted'] == 'False' and n['online'] == 'True' and n['responsive'] == 'True'):
                 status = 'up'
             else:
                 status = 'down'
-            print "%20s %11s %6s %6s %15s %10s %3s(Q) %6s %6s %8s %7s %10s  %-8s" %  (n['name'], n['blacklisted'], n['online'], status, n['nodepool'], 
+            print "%20s %6s %11s %6s %10s %15s %10s %3s(Q) %6s %6s %8s %7s %10s  %-8s" %  (n['name'], status, n['blacklisted'], n['online'], n['responsive'], n['nodepool'], 
                                                                              n['memory'], n['share_order'], n['shares_left'], n['assignments'], 
                                                                              n['np_assignments'], n['quantum'], n['reservable'], n['classes'])
             if ( shares.has_key(n['name']) ):
                 for s in shares[n['name']]:
-                    fmt = '%19s ' + s['jobtype'] +'[%8s] S[%8s] O[%s] II[%8s] IR[%8s] E[%5s] P[%5s] F[%5s] S[%10s]'
+                    type = typemap[s['jobtype']]
+                    fmt = '%19s ' + type +':%-8s ShareId:%-8s Shares:%-s InitTime:%-8s Investment:%-8s Evicted:%-5s Purged:%-5s Fixed:%-5s State:%-10s'
                     state = s['state']
                     if ( state == 'null' ):
                         state = "Assigned"

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java Wed Apr 27 13:36:27 2016
@@ -233,27 +233,24 @@ public class Machine
         // yet assigning to a job so we have to check both the shares given out, and whether the virtual share order is
         // still pristine.
         //
-        // We use this trick so we can use the "normal" allocation mechanisms for bookeeping without special-casing reservations.
+        // We use this trick so we can use the "normal" allocation mechanisms for bookkeeping without special-casing reservations.
         //
-        // UIMA-4142, include blacklist considerations
-        return ( (activeShares.size()) == 0 && (virtual_share_order == share_order) && ( !isBlacklisted() ) );
+        // UIMA-4920, called only if isSchedulable is true
+        return ( (activeShares.size()) == 0 && (virtual_share_order == share_order) );
     }
 
     /**
      * Can preemption free this machine?
+     * UIMA-4920, called only if isSchedulable is true
      */
     public boolean isFreeable()
     {
-        boolean answer = true;
-        // UIMA-4142, include blacklist considerations
-        if ( isBlacklisted() ) return false;
-
         for ( Share s : activeShares.values() ) {
             if ( s.isFixed() ) {
                 return false;
             }
         }
-        return answer;
+        return true;
     }
 
     public int countNpShares()

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Wed Apr 27 13:36:27 2016
@@ -226,7 +226,7 @@ class NodePool
         return count;
     }
      /**
-     * How many do I have, including recusring down the children?
+     * How many do I have, including recursing down the children?
      */
     int countUnresponsiveMachines()
     {
@@ -321,7 +321,7 @@ class NodePool
         if ( mlist == null ) return 0;
 
         for ( Machine m : mlist.values() ) {
-            if ( m.isFree() ) {
+            if ( isSchedulable(m) && m.isFree() ) {
                 cnt++;
             }
         }
@@ -1374,6 +1374,10 @@ class NodePool
         while ( iter.hasNext() && (given < needed) ) {
             Machine m = iter.next();
             logger.info(methodName, j.getId(), "Examining", m.getId());
+            if ( !isSchedulable(m) ) {
+              logger.info(methodName, j.getId(), "Bypass because machine", m.getId(), "is offline or unresponsive or blacklisted");
+              continue;
+            }
             if ( preemptables.containsKey(m.key()) ) {         // already counted, don't count twice
                 logger.info(methodName, j.getId(), "Bypass because machine", m.getId(), "already counted.");
                 continue;
@@ -1556,7 +1560,7 @@ class NodePool
 
         // Machs is all candidate machines, ordered by empty, then most preferable, according to the eviction policy.
         for ( Machine mm : machs ) {
-            if ( mm.isFree() ) {
+            if ( isSchedulable(mm) && mm.isFree() ) {
                 Share s = new Share(mm, job, mm.getShareOrder());
                 s.setFixed();
                 connectShare(s, mm, job, mm.getShareOrder());
@@ -1982,7 +1986,8 @@ class NodePool
     }
 
     //
-    // Order shares by INCRESING investment
+    // Order shares by INCREASING investment
+    // Note:  Machines may not be schedulable but that is checked after sorting in FindMachines
     //
     class ReservationSorter
     	implements Comparator<Machine>

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1741262&r1=1741261&r2=1741262&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Wed Apr 27 13:36:27 2016
@@ -817,7 +817,7 @@ public class Scheduler
         }
 
         HashMap<Node, Integer> nodeUpdates = new HashMap<Node, Integer>();
-        synchronized(deadNodes) {
+        synchronized(illNodes) {
             nodeUpdates.putAll(illNodes);
             illNodes.clear();
         }
@@ -877,7 +877,7 @@ public class Scheduler
      * We first accept any changes and requests from the outside world and place them where they
      * can be acted on in this epoch.
      *
-     * We then pass all relevent requests and resources to the IScheduler.  This returns a
+     * We then pass all relevant requests and resources to the IScheduler.  This returns a
      * SchedulingUpdate which is passed to the dispatcher to be acted upon.
      */
     public JobManagerUpdate schedule()
@@ -1420,13 +1420,14 @@ public class Scheduler
         //
         // Not a cheap query, by the way.
         //
+        // NOTE: No longer used by the rm_qoccupancy script which now goes directly to the database
+        //
+        
+        
         for ( NodePool np : nodepools ) {
 
-            Collection<Machine> machs = np.getAllMachines().values();        
-            for ( Machine m : machs ) {            
-                ret.addMachine(m.queryMachine());
-            }
-
+            // NOTE:  The offline & dead nodes are also in the AllMachines list so must be removed
+            Map<Node, Machine> allMachs     = np.getAllMachines();
             Map<Node, Machine> offline      = np.getOfflineMachines();          // UIMA-4234
             Map<Node, Machine> unresponsive = np.getUnresponsiveMachines();     // UIMA-4234
 
@@ -1439,6 +1440,7 @@ public class Scheduler
                     qm.setUnresponsive();
                 }
                 ret.addMachine(qm);
+                allMachs.remove(n);
             }
 
             for ( Node n : unresponsive.keySet() ) {
@@ -1446,7 +1448,13 @@ public class Scheduler
                 RmQueriedMachine qm = m.queryMachine();
                 qm.setUnresponsive();
                 ret.addMachine(qm);
-            }            
+                allMachs.remove(n);
+            }
+            
+            for ( Node n : allMachs.keySet() ) { 
+              Machine m = allMachs.get(n);
+              ret.addMachine(m.queryMachine());
+          }
         }
 
         return ret;