You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by cw...@apache.org on 2013/04/18 21:41:47 UTC
svn commit: r1469537 - in /uima/sandbox/uima-ducc/trunk:
uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/
uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/
uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/ u...
Author: cwiklik
Date: Thu Apr 18 19:41:46 2013
New Revision: 1469537
URL: http://svn.apache.org/r1469537
Log:
UIMA-2804 Modified to perform cgroup cleanup on agent startup, correctly report major swap faults
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java
uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxProcessMetricsProcessor.java
uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessMemoryPageLoadUsage.java
uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessSwapSpaceUsage.java
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java Thu Apr 18 19:41:46 2013
@@ -139,7 +139,7 @@ public class NodeAgent extends AbstractD
public int shareQuantum;
- public boolean virtualAgent = true;
+ public boolean virtualAgent = false;
/**
* Ctor used exclusively for black-box testing of this class.
@@ -194,7 +194,10 @@ public class NodeAgent extends AbstractD
exclusionParser.parse(exclusionFile);
excludeNodeFromCGroups = exclusionParser.cgroupsExcluded();
excludeAPs = exclusionParser.apExcluded();
-
+ if ( excludeNodeFromCGroups ) {
+ logger.info("nodeAgent", null,
+ "------- Node Explicitly Excluded From Using CGroups. Check File:"+exclusionFile);
+ }
System.out.println("excludeNodeFromCGroups="+excludeNodeFromCGroups+" excludeAPs="+excludeAPs);
} else {
System.out.println("Running with No exclusion File");
@@ -220,6 +223,13 @@ public class NodeAgent extends AbstractD
useCgroups = true;
logger.info("nodeAgent", null,
"------- Agent Running with CGroups Enabled");
+ try {
+ // remove stale CGroups
+ cgroupsManager.cleanupOnStartup();
+ } catch( Exception e) {
+ logger.error("nodeAgent", null,e);
+
+ }
} else {
logger.info("nodeAgent", null,
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java Thu Apr 18 19:41:46 2013
@@ -2,9 +2,16 @@ package org.apache.uima.ducc.agent.launc
import java.io.BufferedReader;
import java.io.File;
+import java.io.FileReader;
+import java.io.InputStream;
import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.LinkedHashSet;
+import java.util.List;
import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.uima.ducc.common.utils.DuccLogger;
import org.apache.uima.ducc.common.utils.Utils;
@@ -12,110 +19,307 @@ import org.apache.uima.ducc.common.utils
/**
* Manages cgroup container on a node
*
- * Supported operations:
- * - cgcreate - creates cgroup container
- * - cgset - sets max memory limit for an existing container
- *
- *
+ * Supported operations: - cgcreate - creates cgroup container - cgset - sets
+ * max memory limit for an existing container
+ *
+ *
*/
public class CGroupsManager {
private DuccLogger agentLogger = null;
-
+
private Set<String> containerIds = new LinkedHashSet<String>();
private String cgroupBaseDir = "";
- private String cgroupSubsystems = ""; // comma separated list of subsystems eg. memory,cpu
+ private String cgroupSubsystems = ""; // comma separated list of subsystems
+ // eg. memory,cpu
+
/**
* @param args
*/
public static void main(String[] args) {
try {
-
- CGroupsManager cgMgr = new CGroupsManager("/cgroup/ducc", "memory", null);
- System.out.println("Cgroups Installed:"+cgMgr.cgroupExists("/cgroup/ducc"));
+
+ CGroupsManager cgMgr = new CGroupsManager("/cgroup/ducc", "memory",
+ null);
+ System.out.println("Cgroups Installed:"
+ + cgMgr.cgroupExists("/cgroup/ducc"));
Set<String> containers = cgMgr.collectExistingContainers();
- for ( String containerId : containers ) {
- System.out.println("Existing CGroup Container ID:"+containerId);
+ for (String containerId : containers) {
+ System.out.println("Existing CGroup Container ID:"
+ + containerId);
}
cgMgr.createContainer(args[0], args[2], true);
- cgMgr.setContainerMaxMemoryLimit(args[0], args[2], true, Long.parseLong(args[1]));
- synchronized( cgMgr ) {
- cgMgr.wait(60000);
- }
- cgMgr.destroyContainer(args[0]);
-
- } catch( Exception e) {
+ cgMgr.setContainerMaxMemoryLimit(args[0], args[2], true,
+ Long.parseLong(args[1]));
+ synchronized (cgMgr) {
+ cgMgr.wait(60000);
+ }
+ cgMgr.destroyContainer(args[0]);
+
+ } catch (Exception e) {
e.printStackTrace();
}
}
-
- public CGroupsManager(String cgroupBaseDir, String cgroupSubsystems, DuccLogger agentLogger ) {
+
+ public CGroupsManager(String cgroupBaseDir, String cgroupSubsystems,
+ DuccLogger agentLogger) {
this.cgroupBaseDir = cgroupBaseDir;
this.cgroupSubsystems = cgroupSubsystems;
this.agentLogger = agentLogger;
}
- /**
- * Creates cgroup container with a given id and owner.
- *
- * @param containerId - new cgroup container id
- * @param userId - owner of the cgroup container
- * @param useDuccSpawn - use duccling to run 'cgcreate' command
- *
- * @return - true on success, false otherwise
- *
- * @throws Exception
- */
- public boolean createContainer(String containerId, String userId, boolean useDuccSpawn ) throws Exception {
+
+ private String[] readPids(File f) throws Exception {
+ List<String> pids = new ArrayList<String>();
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line;
+ while ((line = br.readLine()) != null) {
+ pids.add(line.trim());
+ }
+ br.close();
+ return pids.toArray(new String[pids.size()]);
+ }
+ /**
+ * Finds all stale CGroups in /cgroup/ducc folder and cleans them
+ * up. The code only cleans up cgroups folders with names that follow
+ * ducc's cgroup naming convention: <id>.<id>.<id>.
+ * First, each cgroup is checked for still running processes in the
+ * cgroup by looking at /cgroup/ducc/<id>/cgroup.proc file which
+ * includes PIDs of processes associated with the cgroups. If
+ * processes are found, each one is killed via -9 and the cgroup
+ * is removed.
+ *
+ * @throws Exception
+ */
+ public void cleanupOnStartup() throws Exception {
+
+ Set<NodeProcessInfo> processes = getProcessesOnNode();
+ // Match any folder under /cgroup/ducc that has syntax
+ // <number>.<number>.<number>
+ // This syntax is assigned by ducc to each cgroup
+ Pattern p = Pattern.compile("((\\d+)\\.(\\d+)\\.(\\d+))");
+
+ File cgroupsFolder = new File(cgroupBaseDir);
+ String[] files = cgroupsFolder.list();
+ for (String cgroupFolder : files) {
+ Matcher m = p.matcher(cgroupFolder);
+ // only look at ducc's cgroups
+ if (m.find()) {
+ try {
+ // open proc file which may include PIDs if processes are
+ // still running
+ File f = new File(cgroupBaseDir + "/" + cgroupFolder
+ + "/cgroup.procs");
+ // collect all pids
+ String[] pids = readPids(f);
+ // kill each runnig process via -9
+ if (pids != null && pids.length > 0) {
+ for (String pid : pids) {
+ // Got cgroup processes still running. Kill them
+ for (NodeProcessInfo proc : processes) {
+ if (proc.getPid().equals(pid)) {
+
+ kill( proc.getUserid(), proc.getPid());
+ }
+ }
+ }
+ // it may take some time for the cgroups to udate accounting. Just cycle until
+ // the procs file becomes empty under a given cgroup
+ while( true ) {
+ pids = readPids(f);
+ if ( pids == null || pids.length == 0) {
+ break;
+ } else {
+ try {
+ synchronized(this) {
+ agentLogger.info("cleanupOnStartup", null,
+ "--- CGroup:" + cgroupFolder+ " procs file still showing processes running. Wait until CGroups updates acccounting");
+ wait(200);
+
+ }
+ } catch( InterruptedException ee) {}
+ }
+ }
+ }
+
+ destroyContainer(cgroupFolder);
+ agentLogger.info("cleanupOnStartup", null,
+ "--- Agent Removed Empty CGroup:" + cgroupFolder);
+ } catch (Exception e) {
+ agentLogger.error("cleanupOnStartup", null, e);
+ }
+ }
+ }
+ }
+
+ public void kill(final String user, final String pid) {
+ final String methodName = "kill";
+
+ try {
+ String c_launcher_path = Utils.resolvePlaceholderIfExists(
+ System.getProperty("ducc.agent.launcher.ducc_spawn_path"),
+ System.getProperties());
+ String cmdLine;
+ String arg;
+ boolean useDuccling = false;
+ if (Utils.isWindows()) {
+ cmdLine = "taskkill";
+ arg = "/PID";
+ } else {
+ String useSpawn = System
+ .getProperty("ducc.agent.launcher.use.ducc_spawn");
+ if (useSpawn != null && useSpawn.toLowerCase().equals("true")) {
+ useDuccling = true;
+ }
+ cmdLine = "/bin/kill";
+ arg = "-9";
+ }
+ String[] duccling_nolog;
+ if (useDuccling) {
+ duccling_nolog = new String[] { c_launcher_path, "-u", user,
+ "--", cmdLine, arg, pid };
+ } else {
+ duccling_nolog = new String[] { cmdLine, arg, pid };
+ }
+
+ // if (kill != null && Boolean.parseBoolean(kill) == true) {
+ ProcessBuilder pb = new ProcessBuilder(duccling_nolog);
+ pb.redirectErrorStream(true);
+ java.lang.Process killedProcess = pb.start();
+ InputStream is = killedProcess.getInputStream();
+ BufferedReader reader = new BufferedReader(
+ new InputStreamReader(is));
+ // String line = null;
+ // read the next line from kill command
+ while (reader.readLine() != null) {
+ // dont care about the output, just drain the buffers
+ }
+ is.close();
+ StringBuffer sb = new StringBuffer();
+ for (String part : duccling_nolog) {
+ sb.append(part).append(" ");
+ }
+ if (agentLogger == null) {
+ System.out.println("--------- Killed Process:" + pid
+ + " Owned by:" + user + " Command:" + sb.toString());
+
+ } else {
+ agentLogger.info(methodName, null,
+ "--------- Killed CGroup Process:" + pid + " Owned by:" + user
+ + " Command:" + sb.toString());
+
+ }
+
+ } catch (Exception e) {
+ agentLogger.error(methodName, null,e );
+ }
+ }
+
+ /**
+ * Creates cgroup container with a given id and owner.
+ *
+ * @param containerId
+ * - new cgroup container id
+ * @param userId
+ * - owner of the cgroup container
+ * @param useDuccSpawn
+ * - use duccling to run 'cgcreate' command
+ *
+ * @return - true on success, false otherwise
+ *
+ * @throws Exception
+ */
+ public boolean createContainer(String containerId, String userId,
+ boolean useDuccSpawn) throws Exception {
+
try {
- String [] command = new String[] {"/usr/bin/cgcreate","-g", cgroupSubsystems+":ducc/"+containerId};
- int retCode = launchCommand(command, useDuccSpawn, userId, containerId);
- if ( retCode == 0 ) {
+ String[] command = new String[] { "/usr/bin/cgcreate", "-t",
+ "ducc", "-a", "ducc", "-g",
+ cgroupSubsystems + ":ducc/" + containerId };
+ int retCode = launchCommand(command, useDuccSpawn, "ducc",
+ containerId);
+ if (retCode == 0) {
containerIds.add(containerId);
+ agentLogger.info("createContainer", null, ">>>>"
+ + "SUCCESS - Created CGroup Container:" + containerId);
+
return true;
} else {
+ agentLogger.info("createContainer", null, ">>>>"
+ + "FAILURE - Unable To Create CGroup Container:"
+ + containerId);
+
return false;
}
- } catch ( Exception e ) {
+ } catch (Exception e) {
+ agentLogger.error("createContainer", null, ">>>>"
+ + "FAILURE - Unable To Create CGroup Container:"
+ + containerId, e);
+
return false;
}
}
+
/**
- * Sets the max memory use for an existing cgroup container.
+ * Sets the max memory use for an existing cgroup container.
*
- * @param containerId - existing container id for which limit will be set
- * @param userId - container owner
- * @param useDuccSpawn - run 'cgset' command as a user
- * @param containerMaxSize - max memory limit
+ * @param containerId
+ * - existing container id for which limit will be set
+ * @param userId
+ * - container owner
+ * @param useDuccSpawn
+ * - run 'cgset' command as a user
+ * @param containerMaxSize
+ * - max memory limit
*
* @return - true on success, false otherwise
*
* @throws Exception
*/
- public boolean setContainerMaxMemoryLimit( String containerId, String userId, boolean useDuccSpawn, long containerMaxSize) throws Exception {
+ public boolean setContainerMaxMemoryLimit(String containerId,
+ String userId, boolean useDuccSpawn, long containerMaxSize)
+ throws Exception {
try {
- String [] command = new String[] {"/usr/bin/cgset","-r", "memory.limit_in_bytes="+containerMaxSize, "ducc/"+containerId};
- int retCode = launchCommand(command, useDuccSpawn, userId, containerId);
- return retCode == 0 ? true : false;
- } catch ( Exception e ) {
+ String[] command = new String[] { "/usr/bin/cgset", "-r",
+ "memory.limit_in_bytes=" + containerMaxSize,
+ "ducc/" + containerId };
+ int retCode = launchCommand(command, useDuccSpawn, "ducc",
+ containerId);
+ if (retCode == 0) {
+ agentLogger.info("setContainerMaxMemoryLimit", null, ">>>>"
+ + "SUCCESS - Created CGroup Limit on Container:"
+ + containerId);
+ return true;
+ } else {
+ agentLogger.info("setContainerMaxMemoryLimit", null, ">>>>"
+ + "FAILURE - Unable To Create CGroup Container:"
+ + containerId);
+ return false;
+ }
+ } catch (Exception e) {
+ agentLogger.error("setContainerMaxMemoryLimit", null, ">>>>"
+ + "FAILURE - Unable To Set Limit On CGroup Container:"
+ + containerId, e);
return false;
}
}
-
+
/**
- * Removes cgroup container with a given id. Cgroups are implemented as
- * a virtual file system. All is needed here is just rmdir.
+ * Removes cgroup container with a given id. Cgroups are implemented as a
+ * virtual file system. All is needed here is just rmdir.
*
- * @param containerId - cgroup to remove
+ * @param containerId
+ * - cgroup to remove
* @return - true on success, false otherwise
*
* @throws Exception
*/
public boolean destroyContainer(String containerId) throws Exception {
try {
- if ( cgroupExists(cgroupBaseDir+"/"+containerId)) {
- String [] command = new String[] {"/bin/rmdir", cgroupBaseDir+"/"+containerId};
+ if (cgroupExists(cgroupBaseDir + "/" + containerId)) {
+ String[] command = new String[] { "/bin/rmdir",
+ cgroupBaseDir + "/" + containerId };
int retCode = launchCommand(command, false, "ducc", containerId);
- if ( retCode == 0 ) {
+ if (retCode == 0) {
containerIds.remove(containerId);
return true;
} else {
@@ -123,33 +327,34 @@ public class CGroupsManager {
}
}
return true; // nothing to do, cgroup does not exist
- } catch ( Exception e ) {
+ } catch (Exception e) {
return false;
}
}
-
- private int launchCommand(String[] command, boolean useDuccSpawn, String userId, String containerId) throws Exception {
+
+ private int launchCommand(String[] command, boolean useDuccSpawn,
+ String userId, String containerId) throws Exception {
String[] commandLine = null;
try {
- //
- // Use ducc_ling (c code) as a launcher for the actual process. The ducc_ling
- // allows the process to run as a specified user in order to write out logs in
- // user's space as oppose to ducc space.
- String c_launcher_path =
- Utils.resolvePlaceholderIfExists(
- System.getProperty("ducc.agent.launcher.ducc_spawn_path"),System.getProperties());
-
-
+ //
+ // Use ducc_ling (c code) as a launcher for the actual process. The
+ // ducc_ling
+ // allows the process to run as a specified user in order to write
+ // out logs in
+ // user's space as oppose to ducc space.
+ String c_launcher_path = Utils.resolvePlaceholderIfExists(
+ System.getProperty("ducc.agent.launcher.ducc_spawn_path"),
+ System.getProperties());
- if ( useDuccSpawn && c_launcher_path != null ) {
- commandLine = new String[4+command.length];
+ if (useDuccSpawn && c_launcher_path != null) {
+ commandLine = new String[4 + command.length];
commandLine[0] = c_launcher_path;
commandLine[1] = "-u";
commandLine[2] = userId;
commandLine[3] = "--";
-
- int j=0;
- for(int i=4; i < commandLine.length;i++) {
+
+ int j = 0;
+ for (int i = 4; i < commandLine.length; i++) {
commandLine[i] = command[j++];
}
} else {
@@ -158,38 +363,43 @@ public class CGroupsManager {
ProcessBuilder processLauncher = new ProcessBuilder();
processLauncher.command(commandLine);
processLauncher.redirectErrorStream();
-
+
java.lang.Process process = processLauncher.start();
-
- InputStreamReader in = new InputStreamReader(process.getInputStream());
+
+ InputStreamReader in = new InputStreamReader(
+ process.getInputStream());
BufferedReader reader = new BufferedReader(in);
String line;
while ((line = reader.readLine()) != null) {
- System.out.println(">>>>"+line);
+ agentLogger.info("launchCommand", null, ">>>>" + line);
}
int retCode = process.waitFor();
return retCode;
-
- } catch( Exception e) {
+
+ } catch (Exception e) {
StringBuffer sb = new StringBuffer();
- if ( commandLine != null ) {
- for ( String cmdPart : commandLine ) {
- sb.append(cmdPart).append(" ");
- }
- }
- if ( agentLogger != null ) {
- agentLogger.error("launchCommand", null, "Unable to Launch Command:"+sb.toString(),e);
- } else {
- System.out.println("CGroupsManager.launchCommand()- Unable to Launch Command:"+sb.toString());
- e.printStackTrace();
- }
+ if (commandLine != null) {
+ for (String cmdPart : commandLine) {
+ sb.append(cmdPart).append(" ");
+ }
+ }
+ if (agentLogger != null) {
+ agentLogger.error("launchCommand", null,
+ "Unable to Launch Command:" + sb.toString(), e);
+ } else {
+ System.out
+ .println("CGroupsManager.launchCommand()- Unable to Launch Command:"
+ + sb.toString());
+ e.printStackTrace();
+ }
- }
- return -1; // failure
+ }
+ return -1; // failure
}
+
/**
- * Return a Set of existing cgroup Ids found in the filesystem identified
- * by 'cgroupBaseDir'.
+ * Return a Set of existing cgroup Ids found in the filesystem identified by
+ * 'cgroupBaseDir'.
*
* @return - set of cgroup ids
*
@@ -197,24 +407,96 @@ public class CGroupsManager {
*/
public Set<String> collectExistingContainers() throws Exception {
File duccCGroupBaseDir = new File(cgroupBaseDir);
- if ( duccCGroupBaseDir.exists()) {
+ if (duccCGroupBaseDir.exists()) {
File[] existingCGroups = duccCGroupBaseDir.listFiles();
- for (File cgroup : existingCGroups ) {
- if ( cgroup.isDirectory() ) {
+ for (File cgroup : existingCGroups) {
+ if (cgroup.isDirectory()) {
containerIds.add(cgroup.getName());
}
}
- }
+ }
return containerIds;
}
+
public String getDuccCGroupBaseDir() {
return cgroupBaseDir;
}
+
public String getSubsystems() {
return cgroupSubsystems;
}
+
public boolean cgroupExists(String cgroup) throws Exception {
File duccCGroupBaseDir = new File(cgroup);
return duccCGroupBaseDir.exists();
}
+
+ public Set<NodeProcessInfo> getProcessesOnNode() throws Exception {
+ String location = "getProcessesOnNode";
+ Set<NodeProcessInfo> processList = new HashSet<NodeProcessInfo>();
+ try {
+
+ ProcessBuilder pb = new ProcessBuilder("ps", "-Ao",
+ "user:12,pid,ppid,args", "--no-heading");
+ pb.redirectErrorStream(true);
+ java.lang.Process proc = pb.start();
+ // spawn ps command and scrape the output
+ InputStream stream = proc.getInputStream();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ stream));
+ String line;
+ String regex = "\\s+";
+
+ // read the next line from ps output
+ while ((line = reader.readLine()) != null) {
+
+ String tokens[] = line.split(regex);
+ String user = tokens[0];
+ String pid = tokens[1];
+ String ppid = tokens[2];
+
+ if (tokens.length > 0) {
+
+ processList.add(new NodeProcessInfo(pid, ppid, user));
+ }
+ }
+ } catch (Exception e) {
+ if (agentLogger == null) {
+ e.printStackTrace();
+ } else {
+ agentLogger.error(location, null, e);
+ }
+ }
+ return processList;
+
+ }
+
+ public class NodeProcessInfo {
+ private String pid;
+ private String ppid;
+ private String userid;
+
+ NodeProcessInfo(String pid, String ppid, String uid) {
+ this.pid = pid;
+ this.ppid = ppid;
+ userid = uid;
+ }
+
+ public String getPid() {
+ return pid;
+ }
+
+ public String getPpid() {
+ return ppid;
+ }
+
+ public String getUserid() {
+ return userid;
+ }
+
+ public void setUserid(String userid) {
+ this.userid = userid;
+ }
+
+ }
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java Thu Apr 18 19:41:46 2013
@@ -29,6 +29,8 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
+import javax.management.RuntimeErrorException;
+
import org.apache.uima.ducc.agent.NodeAgent;
import org.apache.uima.ducc.common.utils.DuccLogger;
import org.apache.uima.ducc.common.utils.TimeStamp;
@@ -143,18 +145,24 @@ public class DuccCommandExecutor extends
String containerId = getContainerId();
logger.info(methodName, null, "Creating CGroup with ID:"+containerId);
if ( !agent.cgroupsManager.cgroupExists(agent.cgroupsManager.getDuccCGroupBaseDir()+"/"+containerId) ) {
-
+ boolean failed = false;
// create cgroup container for JDs
try {
if ( createCGroupContainer(duccProcess, containerId, ((ManagedProcess)super.managedProcess).getOwner()) ) {
logger.info(methodName, null, "Created CGroup with ID:"+containerId+" With Memory Limit="+((ManagedProcess)super.managedProcess).getDuccProcess().getCGroup().getMaxMemoryLimit()+" Bytes");
} else {
logger.info(methodName, null, "Failed To Create CGroup with ID:"+containerId);
+ duccProcess.setProcessState(ProcessState.Failed);
+ duccProcess.setReasonForStoppingProcess("CGroupsPermissionDenied");
+ failed = true;
}
} catch( Exception e) {
logger.error(methodName, null, e);
}
+ if ( failed ) {
+ throw new RuntimeException("The Agent is Unable To Create A CGroup with Container ID: "+containerId+". Rejecting Deployment of Process with ID:"+duccProcess.getDuccId());
+ }
} else {
logger.info(methodName, null, "CGroup Exists with ID:"+containerId);
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxProcessMetricsProcessor.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxProcessMetricsProcessor.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxProcessMetricsProcessor.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxProcessMetricsProcessor.java Thu Apr 18 19:41:46 2013
@@ -55,8 +55,8 @@ implements ProcessMetricsProcessor {
private DuccLogger logger;
private ManagedProcess managedProcess;
private NodeAgent agent;
- private int fudgeFactor = 5; // default is 5%
- private int logCounter=0;
+ private int fudgeFactor = 5; // default is 5%
+ //private int logCounter=0;
public LinuxProcessMetricsProcessor(DuccLogger logger, IDuccProcess process, NodeAgent agent,String statmFilePath, String nodeStatFilePath, String processStatFilePath, ManagedProcess managedProcess) throws FileNotFoundException{
this.logger = logger;
statmFile = new RandomAccessFile(statmFilePath, "r");
@@ -64,7 +64,7 @@ implements ProcessMetricsProcessor {
processStatFile = new RandomAccessFile(processStatFilePath, "r");
this.managedProcess = managedProcess;
this.agent = agent;
- pool = Executors.newFixedThreadPool(3);
+ pool = Executors.newFixedThreadPool(30);
this.process = process;
gcStatsCollector = new DuccGarbageStatsCollector(logger, process);
// read the block size from ducc.properties
@@ -113,8 +113,9 @@ implements ProcessMetricsProcessor {
String DUCC_HOME = Utils.findDuccHome();
// executes script DUCC_HOME/admin/ducc_get_process_swap_usage.sh which sums up swap used by a process
DuccProcessSwapSpaceUsage processSwapSpaceUsage =
- new DuccProcessSwapSpaceUsage(process.getPID(),DUCC_HOME+"/admin/ducc_get_process_swap_usage.sh", logger);
-
+ new DuccProcessSwapSpaceUsage(process.getPID(), managedProcess.getOwner(), DUCC_HOME+"/admin/ducc_get_process_swap_usage.sh", logger);
+
+
logger.trace("process", null, "----------- PID:"+process.getPID()+" Cumulative CPU Time (jiffies):"+processCpuUsage.get().getTotalJiffies());
// Publish cumulative CPU usage
process.setCpuTime(processCpuUsage.get().getTotalJiffies());
@@ -122,16 +123,17 @@ implements ProcessMetricsProcessor {
// collects process Major faults (swap in memory)
process.setMajorFaults(majorFaults);
// Current Process Swap Usage in bytes
+ long st = System.currentTimeMillis();
long processSwapUsage = processSwapSpaceUsage.getSwapUsage()*1024;
// collects swap usage from /proc/<PID>/smaps file via a script DUCC_HOME/admin/collect_process_swap_usage.sh
process.setSwapUsage(processSwapUsage);
- if ( (logCounter % 100 ) == 0 ) {
- logger.info("process", null, "----------- PID:"+process.getPID()+" Major Faults:"+majorFaults+" Process Swap Usage:"+processSwapUsage);
- }
- logCounter++;
+ // if ( (logCounter % 2 ) == 0 ) {
+ logger.info("process", null, "----------- PID:"+process.getPID()+" Major Faults:"+majorFaults+" Process Swap Usage:"+processSwapUsage+" Max Swap Usage Allowed:"+managedProcess.getMaxSwapThreshold()+" Time to Collect Swap Usage:"+ (System.currentTimeMillis()-st));
+ //}
+ //logCounter++;
if (processSwapUsage > 0 && processSwapUsage > managedProcess.getMaxSwapThreshold()) {
- logger.error("process", null, "\n\n********************************************************\n\tProcess with PID:"+managedProcess.getPid()+ " Exceeded its max swap usage assignment of "+ managedProcess.getMaxSwapThreshold()+" MBs. This Process Swap Usage is: "+processSwapUsage+" MBs .Killing process ...\n********************************************************\n\n" );
+ logger.error("process", null, "\n\n********************************************************\n\tProcess with PID:"+managedProcess.getPid()+ " Exceeded its Max Swap Usage Threshold of "+ (managedProcess.getMaxSwapThreshold()/1024)/1024+" MBs. The Current Swap Usage is: "+(processSwapUsage/1024)/1024+" MBs .Killing process ...\n********************************************************\n\n" );
try {
managedProcess.kill(); // mark it for death
process.setReasonForStoppingProcess(ReasonForStoppingProcess.ExceededSwapThreshold.toString());
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessMemoryPageLoadUsage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessMemoryPageLoadUsage.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessMemoryPageLoadUsage.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessMemoryPageLoadUsage.java Thu Apr 18 19:41:46 2013
@@ -5,7 +5,7 @@ import org.apache.uima.ducc.common.node.
public class DuccProcessMemoryPageLoadUsage extends ByteBufferParser implements
ProcessMemoryPageLoadUsage {
private static final long serialVersionUID = 1L;
- public static final int MAJORFAULTSFLD=12;
+ public static final int MAJORFAULTSFLD=11;
public DuccProcessMemoryPageLoadUsage(byte[] memInfoBuffer,
int[] memInfoFieldOffsets, int[] memInfoFiledLengths) {
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessSwapSpaceUsage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessSwapSpaceUsage.java?rev=1469537&r1=1469536&r2=1469537&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessSwapSpaceUsage.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/agent/metrics/swap/DuccProcessSwapSpaceUsage.java Thu Apr 18 19:41:46 2013
@@ -3,17 +3,25 @@ package org.apache.uima.ducc.common.agen
import java.io.BufferedReader;
import java.io.InputStreamReader;
+
import org.apache.uima.ducc.common.utils.DuccLogger;
+import org.apache.uima.ducc.common.utils.Utils;
public class DuccProcessSwapSpaceUsage implements ProcessSwapSpaceUsage {
String pid=null;
String execScript=null;
DuccLogger logger=null;
+ String[] command;
- public DuccProcessSwapSpaceUsage( String pid, String execScript, DuccLogger logger) {
+ public DuccProcessSwapSpaceUsage( String pid, String owner, String execScript, DuccLogger logger) {
this.pid = pid;
this.execScript = execScript;
this.logger = logger;
+ String c_launcher_path =
+ Utils.resolvePlaceholderIfExists(
+ System.getProperty("ducc.agent.launcher.ducc_spawn_path"),System.getProperties());
+ command = new String[] { c_launcher_path,
+ "-u", owner, "--", execScript, pid };
}
public long getSwapUsage() {
long swapusage=0;
@@ -21,17 +29,29 @@ public class DuccProcessSwapSpaceUsage i
InputStreamReader in = null;
try {
ProcessBuilder pb = new ProcessBuilder();
- String[] command = {execScript,pid};
- pb.command(command);
+ //String[] command = {execScript,pid};
+ pb.command(command); //command);
+ String cmd = "";
+ for( String c : command) {
+ cmd += " "+ c;
+ }
+ //logger.info("------------ getSwapUsage-", null, cmd);
pb.redirectErrorStream(true);
Process swapCollectorProcess = pb.start();
in = new InputStreamReader(swapCollectorProcess.getInputStream());
BufferedReader reader = new BufferedReader(in);
String line=null;
-
- while ((line = reader.readLine()) != null && line.trim().length() > 0 ) {
+ boolean skip = true;
+ while ((line = reader.readLine()) != null) {
try {
- swapusage = Long.parseLong(line.trim());
+ if ( line.startsWith("1001")) {
+ skip = false;
+ continue;
+ }
+ if (!skip) {
+ swapusage = Long.parseLong(line.trim());
+ logger.info("getSwapUsage-",null, "PID:"+pid+" Swap Usage:"+line);
+ }
} catch( NumberFormatException e) {
logger.error("getSwapUsage", null, line);
}