You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2018/05/11 14:10:52 UTC
svn commit: r1831427 - in /uima/uima-ducc/trunk: src/main/admin/
uima-ducc-common/src/main/java/org/apache/uima/ducc/common/
uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/
uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ uima-du...
Author: degenaro
Date: Fri May 11 14:10:52 2018
New Revision: 1831427
URL: http://svn.apache.org/viewvc?rev=1831427&view=rev
Log:
UIMA-5742 Reliable DUCC - support nodefile with .regex suffix comprising a regular expression to classify nodes into a NodePool
Modified:
uima/uima-ducc/trunk/src/main/admin/ducc_util.py
uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java
uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java
uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex
uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex
uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
Modified: uima/uima-ducc/trunk/src/main/admin/ducc_util.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc_util.py?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc_util.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc_util.py Fri May 11 14:10:52 2018
@@ -1106,11 +1106,14 @@ class DuccUtil(DuccBase):
#
# Read the nodefile, recursing into 'imports' if needed, returning a
# map. The map is keyed on filename, with each entry a list of the nodes.
+ # Skip file with suffix ".regex".
#
def read_nodefile(self, nodefile, ret):
#print 'READ_NODEFILE:', nodefile, ret
n_nodes = 0
- if ( os.path.exists(nodefile) ):
+ if(nodefile.endswith('.regex')):
+ pass
+ elif ( os.path.exists(nodefile) ):
nodes = []
f = open(nodefile)
for node in f:
Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeConfiguration.java Fri May 11 14:10:52 2018
@@ -33,6 +33,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
+import java.util.regex.Pattern;
import org.apache.uima.ducc.common.utils.DuccLogger;
import org.apache.uima.ducc.common.utils.DuccProperties;
@@ -70,6 +71,9 @@ public class NodeConfiguration
Map<String, DuccProperties> poolsByNodefile = new HashMap<String, DuccProperties>(); // nodepool node file -> nodepool props
Map<String, DuccProperties> poolsByNodeName = new HashMap<String, DuccProperties>(); // Nodepools, by node
+ List<String> listRules = new ArrayList<String>(); // ordered list of rules
+ Map<String, DuccProperties> mapRules = new HashMap<String, DuccProperties>();
+
Map<String, String> allImports = new HashMap<String, String>(); // map nodefile -> importer, map for dup checking
Map<String, String> referrers = new HashMap<String, String>(); // map nodefile -> referring nodepool, for dup checking
@@ -86,6 +90,9 @@ public class NodeConfiguration
DuccProperties reserveDefault = null;
String ducc_home = null;
+ String dot_regex = ".regex";
+ String key_regex = "regex";
+
public NodeConfiguration(String config_file_name, String ducc_nodes, String ducc_users, DuccLogger logger)
{
this.config_file_name = config_file_name;
@@ -744,11 +751,75 @@ public class NodeConfiguration
}
}
-
+
+ public String findNodePoolByRule(String node) {
+ String retVal = null;
+ for(String regex : listRules) {
+ if(node.matches(regex)) {
+ DuccProperties dp = mapRules.get(regex);
+ String key = "name";
+ retVal = dp.getProperty(key);
+ }
+ }
+ return retVal;
+ }
+
+ void readNodepoolRegex(String nodefile, DuccProperties np)
+ throws IllegalConfigurationException
+ {
+ BufferedReader br = null;
+ try {
+ if(nodefile == null) {
+ throw new IllegalConfigurationException("Missing parameter \"nodefile\".");
+ }
+ String fn = resolve(nodefile);
+ if(fn == null) {
+ throw new IllegalConfigurationException("Missing file \""+nodefile+"\".");
+ }
+ br = new BufferedReader(new FileReader(fn));
+ String line = null;
+ StringBuffer sb = new StringBuffer();
+ while ( (line = br.readLine()) != null ) {
+ sb.append(line.trim());
+ }
+ String regex = sb.toString().toString();
+ if(regex.isEmpty()) {
+ throw new IllegalConfigurationException("Missing regex in "+nodefile);
+ }
+ if(mapRules.containsKey(regex)) {
+ throw new IllegalConfigurationException("Duplicate regex in "+nodefile);
+ }
+ try {
+ Pattern.compile(regex);
+ listRules.add(regex);
+ mapRules.put(regex, np);
+ }
+ catch(Exception e) {
+ throw new IllegalConfigurationException("Illegal regex in "+nodefile);
+ }
+ np.put(key_regex, regex);
+ }
+ catch (FileNotFoundException e) {
+ throw new IllegalConfigurationException("File not found: "+nodefile);
+ }
+ catch (IOException e) {
+ throw new IllegalConfigurationException("File I/O error: "+nodefile);
+ }
+ catch ( Exception e ) {
+ e.printStackTrace();
+ throw new IllegalConfigurationException(e);
+ }
+ finally {
+ if ( br != null ) {
+ try { br.close(); } catch (IOException e) { }
+ }
+ }
+ }
+
void readNodepoolNodes(String nodefile, DuccProperties p, String domain)
throws IllegalConfigurationException
{
- String methodName = "readnodepoolFiles";
+ String methodName = "readNodepoolNodes";
@SuppressWarnings("unchecked")
Map<String, String> nodes = (Map<String, String>) p.get("nodes");
if ( nodes == null ) {
@@ -986,7 +1057,14 @@ public class NodeConfiguration
// if we get here without crash the node pool files are not inconsistent
for ( String k : poolsByNodefile.keySet() ) {
- readNodepoolNodes(k, (DuccProperties) poolsByNodefile.get(k), domain);
+ DuccProperties dp = (DuccProperties) poolsByNodefile.get(k);
+ String nodefile = dp.getProperty("nodefile");
+ if(nodefile.endsWith(dot_regex)) {
+ readNodepoolRegex(k, dp);
+ }
+ else {
+ readNodepoolNodes(k, dp, domain);
+ }
}
// TODO: Test above procedures
// Assign ducc.nodes to the one allowable top level np with no pool file
@@ -1215,9 +1293,16 @@ public class NodeConfiguration
String nodefile = p.getProperty("nodefile");
String nfheader = " Node File: ";
logInfo(methodName, indent + nfheader + (nodefile == null ? "None" : nodefile));
- @SuppressWarnings("unchecked")
- Map<String, String> nodes = (Map<String, String>) p.get("nodes");
- logInfo(methodName, formatNodes(nodes, indent.length() + nfheader.length()));
+
+ String regex = p.getProperty(key_regex);
+ if(regex != null) {
+ logInfo(methodName, indent + " Node Rule: " + regex );
+ }
+ else {
+ @SuppressWarnings("unchecked")
+ Map<String, String> nodes = (Map<String, String>) p.get("nodes");
+ logInfo(methodName, formatNodes(nodes, indent.length() + nfheader.length()));
+ }
@SuppressWarnings("unchecked")
List<DuccProperties> class_set = (List<DuccProperties>) p.get("classes");
Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccSchedulerClasses.java Fri May 11 14:10:52 2018
@@ -118,6 +118,12 @@ public class DuccSchedulerClasses {
if(nodepool != null) {
retVal = nodepool;
}
+ else {
+ nodepool = nc.findNodePoolByRule(node);
+ if(nodepool != null) {
+ retVal = nodepool;
+ }
+ }
return retVal;
}
/**
Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex Fri May 11 14:10:52 2018
@@ -104,6 +104,8 @@ start_ducc -c sm -c pm -c rm -c or@bj22
To start only agents, run start\_ducc specifying a nodelist explicitly. Note that the broker
must have already been started.
+ Files in the nodelist ending with suffix .regex are skipped.
+
To start a specific management process, run start\_ducc with the -c component parameter,
specify the component that should be started.
@@ -249,7 +251,7 @@ start_ducc -c rm
existance of DUCC processes in the cluster, to forcibly ({\em kill -9}) terminate them, and to
more gracefully terminate them ({\em kill -INT}).
-
+ Files in the nodelist ending with suffix .regex are skipped.
\subsection{check\_ducc}
\label{subsec:admin.check-ducc}
@@ -331,7 +333,9 @@ check_ducc -n nlist1 -n nlist2
\end{description}
-
+ \subsubsection{{\em Notes:}}
+
+ Files in the nodelist ending with suffix .regex are skipped.
\subsection{build\_duccling}
\label{subsec:admin.build-duccling}
Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-classes.tex Fri May 11 14:10:52 2018
@@ -159,6 +159,9 @@ ducc.classes and is specified by the pro
\item[nodefile] This is the name of a file containing the names of the nodes
which are members of this nodepool.
+ If the file name ends with suffix {\em .regex} then the content is instead
+ expected to be a regular expression which is employed to match nodes comprising
+ the members of the nodepool.
\item[parent] This is used to indicate which nodepool is the logical parent.
Any nodepool without a {\em parent} is considered a top-level nodepool.
@@ -197,7 +200,31 @@ ducc.classes and is specified by the pro
\end{figure}
+ The following example shows an entry utilizing the {\em .regex} feature, followed
+ by the content of the specifed file comprising a regular expression.
+ \begin{figure}[H]
+
+\begin{verbatim}
+ Nodepool jobdriver { nodefile jobdriver.nodes.regex }
+\end{verbatim}
+ \caption{Sample Nodepool regex specification}
+ \label{fig:nodepool.regex.specifiecation}
+
+ \end{figure}
+
+
+ \begin{figure}[H]
+
+\begin{verbatim}
+ hostA|hostB|hostC
+\end{verbatim}
+ \caption{Sample file contents for jobdriver.nodes.regex }
+ \label{fig:nodepool.regex}
+
+ \end{figure}
+
+
\subsection{Class Definitions}
\label{subsubsec:class.configuration}
Modified: uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1831427&r1=1831426&r2=1831427&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Fri May 11 14:10:52 2018
@@ -18,13 +18,19 @@
*/
package org.apache.uima.ducc.rm.scheduler;
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
+import org.apache.uima.ducc.common.IDuccEnv;
import org.apache.uima.ducc.common.Node;
import org.apache.uima.ducc.common.NodeConfiguration;
import org.apache.uima.ducc.common.NodeIdentity;
@@ -41,6 +47,7 @@ import org.apache.uima.ducc.common.db.Db
import org.apache.uima.ducc.common.utils.DuccLogger;
import org.apache.uima.ducc.common.utils.DuccProperties;
import org.apache.uima.ducc.common.utils.DuccPropertiesResolver;
+import org.apache.uima.ducc.common.utils.IllegalConfigurationException;
import org.apache.uima.ducc.common.utils.SystemPropertyResolver;
import org.apache.uima.ducc.common.utils.Version;
import org.apache.uima.ducc.common.utils.id.DuccId;
@@ -63,7 +70,8 @@ public class Scheduler
{
IJobManager jobManager;
static DuccLogger logger = DuccLogger.getLogger(Scheduler.class, COMPONENT_NAME);
-
+ static DuccId jobid = null;
+ String dot_regex = ".regex";
boolean done = false;
// Boolean force_epoch = false;
String ducc_home;
@@ -75,6 +83,8 @@ public class Scheduler
boolean needRecovery = false; // UIMA-4142 tell outer layer that recovery is required
AbstractDuccComponent baseComponent; // UIMA-4142, pass in the base for reconfig - reread ducc.properties
NodePool[] nodepools; // top-level nodepools
+ List<String> listRules = new ArrayList<String>(); // ordered list of rules
+ Map<String,NodePool> mapRules = new HashMap<String,NodePool>();
int max_order = 0;
//
@@ -306,6 +316,8 @@ public class Scheduler
this.configuration = null;
this.defaultDomain = null;
this.nodepools = null;
+ this.mapRules.clear();
+ this.listRules.clear();
this.max_order = 0;
this.busyShares.clear();
this.vacatedShares.clear();
@@ -482,6 +494,87 @@ public class Scheduler
updateNodepoolsByNode(s, pool); // maps from both the fully-qualified name and th shortnmae
}
}
+
+ String readNodepoolRegex(String nodefile)
+ throws IllegalConfigurationException
+ {
+ String location = "readNodepoolRegex";
+ String regex = null;
+ BufferedReader br = null;
+ try {
+ if(nodefile == null) {
+ throw new IllegalConfigurationException("Missing parameter \"nodefile\".");
+ }
+ String fn = IDuccEnv.DUCC_HOME+"/resources/"+nodefile;
+ br = new BufferedReader(new FileReader(fn));
+ String line = null;
+ StringBuffer sb = new StringBuffer();
+ while ( (line = br.readLine()) != null ) {
+ sb.append(line.trim());
+ }
+ regex = sb.toString().toString();
+ if(regex.isEmpty()) {
+ throw new IllegalConfigurationException("Missing regex in "+nodefile);
+ }
+ try {
+ Pattern.compile(regex);
+ String text = nodefile+":"+regex;
+ logger.info(location, jobid, text);
+ }
+ catch(Exception e) {
+ throw new IllegalConfigurationException("Illegal regex in "+nodefile);
+ }
+ }
+ catch (FileNotFoundException e) {
+ throw new IllegalConfigurationException("File not found: "+nodefile);
+ }
+ catch (IOException e) {
+ throw new IllegalConfigurationException("File I/O error: "+nodefile);
+ }
+ catch ( Exception e ) {
+ e.printStackTrace();
+ throw new IllegalConfigurationException(e);
+ }
+ finally {
+ if ( br != null ) {
+ try { br.close(); } catch (IOException e) { }
+ }
+ }
+ return regex;
+ }
+
+ void mapNodeRule(DuccProperties dp, NodePool nodepool) {
+ String location = "mapNodeRule";
+ try {
+ String nodefile = dp.getProperty("nodefile");
+ if(nodefile != null) {
+ if(nodefile.endsWith(dot_regex)) {
+ String noderule = readNodepoolRegex(nodefile);
+ addRule(noderule, nodepool);
+ }
+ }
+ }
+ catch(Exception e) {
+ logger.error(location, jobid, e);
+ }
+ }
+
+ /*
+ * only add rule if it is unique (first one seen wins)
+ */
+ private void addRule(String noderule, NodePool np) {
+ String location = "addRule";
+ if(noderule != null) {
+ if(mapRules.containsKey(noderule)) {
+ logger.warn(location, jobid, "duplicate ignored: ", noderule, np.getId());
+ }
+ else {
+ listRules.add(noderule);
+ mapRules.put(noderule, np);
+ logger.info(location, jobid, noderule, np.getId());
+ }
+ }
+ }
/**
* (Recursively) build up the heirarchy under the parent nodepool.
@@ -497,7 +590,7 @@ public class Scheduler
int search_order = dp.getIntProperty("search-order", 100);
NodePool child = parent.createSubpool(id, nodes, search_order);
mapNodesToNodepool(nodes, child);
-
+ mapNodeRule(dp, child);
@SuppressWarnings("unchecked")
List<DuccProperties> grandkids = (List<DuccProperties>) dp.get("children");
createSubpools(child, grandkids);
@@ -600,7 +693,7 @@ public class Scheduler
mapNodesToNodepool(nodes, nodepools[i]);
logger.info(methodName, null, "Created top-level nodepool", id);
-
+ mapNodeRule(np, nodepools[i]);
@SuppressWarnings("unchecked")
List<DuccProperties> children = (List<DuccProperties>) np.get("children");
createSubpools(nodepools[i], children);
@@ -1132,14 +1225,68 @@ public class Scheduler
logger.info(methodName, null, "Map", shortname, "to", np.getId());
}
}
-
+
+ /*
+ * find nodepool by rule, which is a regular expression
+ */
+ private NodePool findNodepoolByRule(NodeIdentity ni) {
+ String location = "findNodepoolByRule";
+ NodePool np = null;
+ try {
+ String name1 = ni.getCanonicalName();
+ String name2 = ni.getShortName();
+ String ip = ni.getIp();
+ logger.info(location, jobid, mapRules.size(), name1, name2, ip);
+ for(String noderule : listRules) {
+ np = mapRules.get(noderule);
+ // match name with domain
+ if(name1.matches(noderule)) {
+ logger.info(location, jobid, "match by name: ", noderule, name1, np.getId());
+ break;
+ }
+ else {
+ logger.debug(location, jobid, "no match by name: ", noderule, name1);
+ }
+ // match name without domain
+ if(name2.matches(noderule)) {
+ logger.info(location, jobid, "match by name: ", noderule, name2, np.getId());
+ break;
+ }
+ else {
+ logger.debug(location, jobid, "no match by name: ", noderule, name2);
+ }
+ // match ip
+ if(ip.matches(noderule)) {
+ logger.info(location, jobid, "match by ip: ", noderule, ip, np.getId());
+ break;
+ }
+ else {
+ logger.debug(location, jobid, "no match by ip: ", noderule, ip);
+ }
+ }
+ }
+ catch(Exception e) {
+ logger.error(location, jobid, e);
+ }
+ return np;
+ }
+
//
// Return a nodepool by Node. If the node can't be associated with a nodepool, return the
// default nodepool, which is always the first one defined in the config file.
//
NodePool getNodepoolByName(NodeIdentity ni)
{
- NodePool np = nodepoolsByNode.get( ni.getCanonicalName() );
+ String location = "getNodepoolByName";
+ NodePool np = findNodepoolByRule(ni);
+ if(np != null) {
+ String text = "node:"+ni.getShortName()+" "+np.getId()+" "+"add by rule.";
+ logger.info(location, jobid, text);
+ updateNodepoolsByNode(ni.getCanonicalName(), np);
+ }
+ if(np == null) {
+ np = nodepoolsByNode.get( ni.getCanonicalName() );
+ }
if ( np == null ) {
np = nodepoolsByNode.get( ni.getIp() );
}