You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2018/05/11 14:10:52 UTC

svn commit: r1831427 - in /uima/uima-ducc/trunk: src/main/admin/ uima-ducc-common/src/main/java/org/apache/uima/ducc/common/ uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/ uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ uima-du...

Author: degenaro
Date: Fri May 11 14:10:52 2018
New Revision: 1831427

URL: http://svn.apache.org/viewvc?rev=1831427&view=rev
Log:
UIMA-5742 Reliable DUCC - support nodefile with .regex suffix comprising a regular expression to classify nodes into a NodePool

Modified:
    uima/uima-ducc/trunk/src/main/admin/ducc_util.py
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java

Modified: uima/uima-ducc/trunk/src/main/admin/ducc_util.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc_util.py?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc_util.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc_util.py Fri May 11 14:10:52 2018
@@ -1106,11 +1106,14 @@ class DuccUtil(DuccBase):
     #
     # Read the nodefile, recursing into 'imports' if needed, returning a
     # map.  The map is keyed on filename, with each entry a list of the nodes.
+    # Skip file with suffix ".regex".
     #
     def read_nodefile(self, nodefile, ret):
         #print 'READ_NODEFILE:', nodefile, ret
         n_nodes = 0
-        if ( os.path.exists(nodefile) ):
+        if(nodefile.endswith('.regex')):
+            pass
+        elif ( os.path.exists(nodefile) ):
             nodes = []
             f = open(nodefile)
             for node in f:

Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java Fri May 11 14:10:52 2018
@@ -33,6 +33,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;
+import java.util.regex.Pattern;
 
 import org.apache.uima.ducc.common.utils.DuccLogger;
 import org.apache.uima.ducc.common.utils.DuccProperties;
@@ -70,6 +71,9 @@ public class NodeConfiguration
     Map<String, DuccProperties> poolsByNodefile = new HashMap<String, DuccProperties>();   // nodepool node file -> nodepool props
     Map<String, DuccProperties> poolsByNodeName = new HashMap<String, DuccProperties>();   // Nodepools, by node
 
+    List<String> listRules = new ArrayList<String>();  // ordered list of rules 
+    Map<String, DuccProperties> mapRules = new HashMap<String, DuccProperties>(); 
+    
     Map<String, String> allImports = new HashMap<String, String>();                        // map nodefile -> importer, map for dup checking
     Map<String, String> referrers  = new HashMap<String, String>();                        // map nodefile -> referring nodepool, for dup checking
 
@@ -86,6 +90,9 @@ public class NodeConfiguration
     DuccProperties reserveDefault   = null;
     String ducc_home = null;
 
+    String dot_regex = ".regex";
+    String key_regex = "regex";
+    
     public NodeConfiguration(String config_file_name, String ducc_nodes, String ducc_users, DuccLogger logger)
     {
         this.config_file_name = config_file_name;
@@ -744,11 +751,75 @@ public class NodeConfiguration
 
         }        
     }
-
+    
+    public String findNodePoolByRule(String node) {
+    	String retVal = null;
+    	for(String regex : listRules) {
+    		if(node.matches(regex)) {
+    			DuccProperties dp = mapRules.get(regex);
+    			String key = "name";
+    			retVal = dp.getProperty(key);
+    		}
+    	}
+    	return retVal;
+    }
+    
+    void readNodepoolRegex(String nodefile, DuccProperties np)
+    		throws IllegalConfigurationException
+    {    	
+    	BufferedReader br = null;
+    	try {
+    		if(nodefile == null) {
+    			throw new IllegalConfigurationException("Missing parameter \"nodefile\".");
+    		}
+    		String fn = resolve(nodefile);
+    		if(fn == null) {
+    			throw new IllegalConfigurationException("Missing file \""+nodefile+"\".");
+    		}
+    		br = new BufferedReader(new FileReader(fn));
+    		String line = null;
+    		StringBuffer sb = new StringBuffer();
+    		while ( (line = br.readLine()) != null ) {
+    			sb.append(line.trim());
+    		}
+    		String regex = sb.toString().toString();
+    		if(regex.isEmpty()) {
+    			throw new IllegalConfigurationException("Missing regex in "+nodefile);
+    		}
+    		if(mapRules.containsKey(regex)) {
+    			throw new IllegalConfigurationException("Duplicate regex in "+nodefile);
+			}
+    		try {
+    			Pattern.compile(regex);
+    			listRules.add(regex);
+    			mapRules.put(regex, np);
+    		}
+    		catch(Exception e) {
+    			throw new IllegalConfigurationException("Illegal regex in "+nodefile);
+    		}
+    		np.put(key_regex, regex);
+    	}
+    	catch (FileNotFoundException e) {
+            throw new IllegalConfigurationException("File not found: "+nodefile);
+        } 
+    	catch (IOException e) {
+            throw new IllegalConfigurationException("File I/O error: "+nodefile);
+        } 
+    	catch ( Exception e ) {
+    		e.printStackTrace();
+    		throw new IllegalConfigurationException(e);
+        } 
+    	finally {
+            if ( br != null ) {
+                try { br.close(); } catch (IOException e) { }
+            }
+        }     
+    }
+    
     void readNodepoolNodes(String nodefile, DuccProperties p, String domain)
     		throws IllegalConfigurationException
     {    	
-    	String methodName = "readnodepoolFiles";
+    	String methodName = "readNodepoolNodes";
         @SuppressWarnings("unchecked")
         Map<String, String> nodes = (Map<String, String>) p.get("nodes");
         if ( nodes == null ) {
@@ -986,7 +1057,14 @@ public class NodeConfiguration
             
         // if we get here without crash the node pool files are not inconsistent
         for ( String k : poolsByNodefile.keySet() ) {
-            readNodepoolNodes(k, (DuccProperties) poolsByNodefile.get(k), domain);
+        	DuccProperties dp = (DuccProperties) poolsByNodefile.get(k);
+        	String nodefile = dp.getProperty("nodefile");
+        	if(nodefile.endsWith(dot_regex)) {
+        		readNodepoolRegex(k, dp);
+        	}
+        	else {
+        		readNodepoolNodes(k, dp, domain);
+        	}
         }
         // TODO: Test above procedures
         //       Assign ducc.nodes to the one allowable top level np with no pool file
@@ -1215,9 +1293,16 @@ public class NodeConfiguration
         String nodefile = p.getProperty("nodefile");
         String nfheader = "   Node File: ";
         logInfo(methodName, indent + nfheader + (nodefile == null ? "None" : nodefile));
-        @SuppressWarnings("unchecked")
-        Map<String, String> nodes = (Map<String, String>) p.get("nodes");
-        logInfo(methodName, formatNodes(nodes, indent.length() + nfheader.length()));
+        
+        String regex = p.getProperty(key_regex);
+        if(regex != null) {
+        	logInfo(methodName, indent + "   Node Rule: " + regex );
+        }
+        else {
+        	@SuppressWarnings("unchecked")
+            Map<String, String> nodes = (Map<String, String>) p.get("nodes");
+            logInfo(methodName, formatNodes(nodes, indent.length() + nfheader.length()));
+        }
         
         @SuppressWarnings("unchecked")
         List<DuccProperties> class_set = (List<DuccProperties>) p.get("classes");

Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java Fri May 11 14:10:52 2018
@@ -118,6 +118,12 @@ public class DuccSchedulerClasses {
   		if(nodepool != null) {
   			retVal = nodepool;
   		}
+  		else {
+  			nodepool = nc.findNodePoolByRule(node);
+  			if(nodepool != null) {
+  	  			retVal = nodepool;
+  	  		}
+  		}
   		return retVal;
   	}
 	/**

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex Fri May 11 14:10:52 2018
@@ -104,6 +104,8 @@ start_ducc -c sm -c pm -c rm -c or@bj22
       To start only agents, run start\_ducc specifying a nodelist explicitly. Note that the broker
       must have already been started.
       
+      Files in the nodelist ending with suffix .regex are skipped.
+      
       To start a specific management process, run start\_ducc with the -c component parameter, 
       specify the component that should be started. 
       
@@ -249,7 +251,7 @@ start_ducc -c rm
    existance of DUCC processes in the cluster, to forcibly ({\em kill -9}) terminate them, and to
    more gracefully terminate them ({\em kill -INT}).
           
-
+      Files in the nodelist ending with suffix .regex are skipped.
 
 \subsection{check\_ducc}
 \label{subsec:admin.check-ducc}
@@ -331,7 +333,9 @@ check_ducc -n nlist1 -n nlist2
 
            \end{description}               
 
-
+   \subsubsection{{\em Notes:}}
+   
+      Files in the nodelist ending with suffix .regex are skipped.
             
 \subsection{build\_duccling}
 \label{subsec:admin.build-duccling}

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex Fri May 11 14:10:52 2018
@@ -159,6 +159,9 @@ ducc.classes and is specified by the pro
 
       \item[nodefile] This is the name of a file containing the names of the nodes
         which are members of this nodepool.
+        If the file name ends with suffix {\em .regex} then the content is instead
+        expected to be a regular expression which is employed to match nodes comprising
+        the members of the nodepool.
 
       \item[parent] This is used to indicate which nodepool is the logical parent.
         Any nodepool without a {\em parent} is considered a top-level nodepool.
@@ -197,7 +200,31 @@ ducc.classes and is specified by the pro
 
     \end{figure}    
 
+    The following example shows an entry utilizing the {\em .regex} feature, followed
+    by the content of the specifed file comprising a regular expression. 
 
+    \begin{figure}[H]
+    
+\begin{verbatim}
+    Nodepool jobdriver    { nodefile jobdriver.nodes.regex }
+\end{verbatim}
+      \caption{Sample Nodepool regex specification}
+      \label{fig:nodepool.regex.specifiecation}
+
+    \end{figure}    
+    
+    
+    \begin{figure}[H]
+    
+\begin{verbatim}
+    hostA|hostB|hostC
+\end{verbatim}
+      \caption{Sample file contents for jobdriver.nodes.regex }
+      \label{fig:nodepool.regex}
+
+    \end{figure}    
+    
+    
     \subsection{Class Definitions}
     \label{subsubsec:class.configuration}
 

Modified: uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Fri May 11 14:10:52 2018
@@ -18,13 +18,19 @@
 */
 package org.apache.uima.ducc.rm.scheduler;
 
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
+import org.apache.uima.ducc.common.IDuccEnv;
 import org.apache.uima.ducc.common.Node;
 import org.apache.uima.ducc.common.NodeConfiguration;
 import org.apache.uima.ducc.common.NodeIdentity;
@@ -41,6 +47,7 @@ import org.apache.uima.ducc.common.db.Db
 import org.apache.uima.ducc.common.utils.DuccLogger;
 import org.apache.uima.ducc.common.utils.DuccProperties;
 import org.apache.uima.ducc.common.utils.DuccPropertiesResolver;
+import org.apache.uima.ducc.common.utils.IllegalConfigurationException;
 import org.apache.uima.ducc.common.utils.SystemPropertyResolver;
 import org.apache.uima.ducc.common.utils.Version;
 import org.apache.uima.ducc.common.utils.id.DuccId;
@@ -63,7 +70,8 @@ public class Scheduler
 {
     IJobManager jobManager;
     static DuccLogger     logger = DuccLogger.getLogger(Scheduler.class, COMPONENT_NAME);
-
+    static DuccId jobid = null;
+    String dot_regex = ".regex";
     boolean done = false;
     // Boolean force_epoch = false;
     String ducc_home;
@@ -75,6 +83,8 @@ public class Scheduler
     boolean needRecovery = false;                                         // UIMA-4142 tell outer layer that recovery is required
     AbstractDuccComponent baseComponent;                                  // UIMA-4142, pass in the base for reconfig - reread ducc.properties
     NodePool[] nodepools;                                                 // top-level nodepools
+    List<String> listRules = new ArrayList<String>();  // ordered list of rules 
+    Map<String,NodePool> mapRules = new HashMap<String,NodePool>();
     int max_order = 0;
 
     //
@@ -306,6 +316,8 @@ public class Scheduler
         this.configuration = null;
         this.defaultDomain = null;
         this.nodepools = null;
+        this.mapRules.clear();
+        this.listRules.clear();
         this.max_order = 0;
         this.busyShares.clear();
         this.vacatedShares.clear();
@@ -482,6 +494,87 @@ public class Scheduler
             updateNodepoolsByNode(s, pool);        // maps from both the fully-qualified name and th shortnmae
         }
     }
+    
+    String readNodepoolRegex(String nodefile)
+    		throws IllegalConfigurationException
+    {    	
+    	String location = "readNodepoolRegex";
+    	String regex = null;
+    	BufferedReader br = null;
+    	try {
+    		if(nodefile == null) {
+    			throw new IllegalConfigurationException("Missing parameter \"nodefile\".");
+    		}
+    		String fn = IDuccEnv.DUCC_HOME+"/resources/"+nodefile;
+    		br = new BufferedReader(new FileReader(fn));
+    		String line = null;
+    		StringBuffer sb = new StringBuffer();
+    		while ( (line = br.readLine()) != null ) {
+    			sb.append(line.trim());
+    		}
+    		regex = sb.toString().toString();
+    		if(regex.isEmpty()) {
+    			throw new IllegalConfigurationException("Missing regex in "+nodefile);
+    		}
+    		try {
+    			Pattern.compile(regex);
+    			String text = nodefile+":"+regex;
+    			logger.info(location, jobid, text);
+    		}
+    		catch(Exception e) {
+    			throw new IllegalConfigurationException("Illegal regex in "+nodefile);
+    		}
+    	}
+    	catch (FileNotFoundException e) {
+            throw new IllegalConfigurationException("File not found: "+nodefile);
+        } 
+    	catch (IOException e) {
+            throw new IllegalConfigurationException("File I/O error: "+nodefile);
+        } 
+    	catch ( Exception e ) {
+    		e.printStackTrace();
+    		throw new IllegalConfigurationException(e);
+        } 
+    	finally {
+            if ( br != null ) {
+                try { br.close(); } catch (IOException e) { }
+            }
+        }     
+    	return regex;
+    }
+    
+    void mapNodeRule(DuccProperties dp, NodePool nodepool) {
+    	String location = "mapNodeRule";
+    	try {
+        	String nodefile = dp.getProperty("nodefile");
+        	if(nodefile != null) {
+        		if(nodefile.endsWith(dot_regex)) {
+            		String noderule = readNodepoolRegex(nodefile);
+            		addRule(noderule, nodepool);
+            	}
+        	}
+        }
+        catch(Exception e) {
+        	logger.error(location, jobid, e);
+        }
+    }
+        
+    /*
+     * only add rule if it is unique (first one seen wins)
+     */
+    private void addRule(String noderule, NodePool np) {
+    	String location = "addRule";
+    	if(noderule != null) {
+    		if(mapRules.containsKey(noderule)) {
+        		logger.warn(location, jobid, "duplicate ignored: ", noderule, np.getId());
+        	}
+        	else {
+        		listRules.add(noderule);
+        		mapRules.put(noderule, np);
+        		logger.info(location, jobid, noderule, np.getId());
+        	}
+    	}
+    }
 
     /**
      * (Recursively) build up the heirarchy under the parent nodepool.
@@ -497,7 +590,7 @@ public class Scheduler
             int search_order = dp.getIntProperty("search-order", 100);
             NodePool child = parent.createSubpool(id, nodes, search_order);
             mapNodesToNodepool(nodes, child);
-
+            mapNodeRule(dp, child);
             @SuppressWarnings("unchecked")
 			List<DuccProperties> grandkids = (List<DuccProperties>) dp.get("children");
             createSubpools(child, grandkids);            
@@ -600,7 +693,7 @@ public class Scheduler
 
             mapNodesToNodepool(nodes, nodepools[i]);
             logger.info(methodName, null, "Created top-level nodepool", id);
-
+            mapNodeRule(np, nodepools[i]);
             @SuppressWarnings("unchecked")
 			List<DuccProperties> children = (List<DuccProperties>) np.get("children");
             createSubpools(nodepools[i], children);
@@ -1132,14 +1225,68 @@ public class Scheduler
             logger.info(methodName, null, "Map", shortname, "to", np.getId());
         }
     }
-
+    
+    /*
+     * find nodepool by rule, which is a regular expression
+     */
+    private NodePool findNodepoolByRule(NodeIdentity ni) {
+    	String location = "findNodepoolByRule";
+    	NodePool np = null;
+    	try {
+    		String name1 = ni.getCanonicalName();
+    		String name2 = ni.getShortName();
+        	String ip = ni.getIp();
+        	logger.info(location, jobid, mapRules.size(), name1, name2, ip);
+        	for(String noderule : listRules) {
+        		np = mapRules.get(noderule);
+        		// match name with domain
+        		if(name1.matches(noderule)) {
+        			logger.info(location, jobid, "match by name: ", noderule, name1, np.getId());
+        			break;
+        		}
+        		else {
+        			logger.debug(location, jobid, "no match by name: ", noderule, name1);
+        		}
+        		// match name without domain
+        		if(name2.matches(noderule)) {
+        			logger.info(location, jobid, "match by name: ", noderule, name2, np.getId());
+        			break;
+        		}
+        		else {
+        			logger.debug(location, jobid, "no match by name: ", noderule, name2);
+        		}
+        		// match ip
+        		if(ip.matches(noderule)) {
+        			logger.info(location, jobid, "match by ip: ", noderule, ip,  np.getId());
+        			break;
+        		}
+        		else {
+        			logger.debug(location, jobid, "no match by ip: ", noderule, ip);
+        		}
+        	}
+    	}
+    	catch(Exception e) {
+    		logger.error(location, jobid, e);
+    	}
+    	return np;
+    }
+    
     //
     // Return a nodepool by Node.  If the node can't be associated with a nodepool, return the
     // default nodepool, which is always the first one defined in the config file.
     //
     NodePool getNodepoolByName(NodeIdentity ni)
     {
-        NodePool np = nodepoolsByNode.get( ni.getCanonicalName() );
+    	String location = "getNodepoolByName";
+    	NodePool np = findNodepoolByRule(ni);
+    	if(np != null) {
+    		String text = "node:"+ni.getShortName()+" "+np.getId()+" "+"add by rule.";
+    		logger.info(location, jobid, text);
+    		updateNodepoolsByNode(ni.getCanonicalName(), np);
+    	}
+    	if(np == null) {
+    		np = nodepoolsByNode.get( ni.getCanonicalName() );
+    	}
         if ( np == null ) {
             np = nodepoolsByNode.get( ni.getIp() );
         }