You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by ac...@apache.org on 2013/02/06 16:22:58 UTC

svn commit: r1443014 - in /hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project: ./ hadoop-yarn/dev-support/ hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/ hadoop-yarn/hadoop-yarn-common/src/main/resources/ hadoop-ya...

Author: acmurthy
Date: Wed Feb  6 15:22:57 2013
New Revision: 1443014

URL: http://svn.apache.org/viewvc?rev=1443014&view=rev
Log:
Merge -c 1423706 from trunk to branch-2.0.3-alpha to fix YARN-3. Add support for CPU isolation/monitoring of containers. Contributed by Andrew Ferguson.

Added:
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
      - copied unchanged from r1423706, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/DefaultLCEResourcesHandler.java
      - copied unchanged from r1423706, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/DefaultLCEResourcesHandler.java
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/LCEResourcesHandler.java
      - copied unchanged from r1423706, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/LCEResourcesHandler.java
Modified:
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
    hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/CHANGES.txt?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/CHANGES.txt Wed Feb  6 15:22:57 2013
@@ -8,6 +8,9 @@ Release 2.0.3-alpha - 2013-02-06 
 
     YARN-145. Add a Web UI to the fair share scheduler. (Sandy Ryza via tomwhite)
 
+    YARN-3. Add support for CPU isolation/monitoring of containers. 
+    (adferguson via tucu)
+
     YARN-230. RM Restart phase 1 - includes support for saving/restarting all
     applications on an RM bounce. (Bikas Saha via acmurthy)
 

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml Wed Feb  6 15:22:57 2013
@@ -242,4 +242,11 @@
     <Bug pattern="EI_EXPOSE_REP2" />
   </Match>
   
+   <!-- /proc/mounts is always in the same place -->
+  <Match>
+    <Class name="org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler" />
+    <Method name="parseMtab" />
+    <Bug pattern="DMI_HARDCODED_ABSOLUTE_FILENAME" />
+  </Match>
+
 </FindBugsFilter>

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java Wed Feb  6 15:22:57 2013
@@ -514,6 +514,24 @@ public class YarnConfiguration extends C
   public static final String NM_LINUX_CONTAINER_GROUP =
     NM_PREFIX + "linux-container-executor.group";
   
+  /** The type of resource enforcement to use with the
+   *  linux container executor.
+   */
+  public static final String NM_LINUX_CONTAINER_RESOURCES_HANDLER = 
+  NM_PREFIX + "linux-container-executor.resources-handler.class";
+  
+  /** The path the linux container executor should use for cgroups */
+  public static final String NM_LINUX_CONTAINER_CGROUPS_HIERARCHY =
+    NM_PREFIX + "linux-container-executor.cgroups.hierarchy";
+  
+  /** Whether the linux container executor should mount cgroups if not found */
+  public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT =
+    NM_PREFIX + "linux-container-executor.cgroups.mount";
+  
+  /** Where the linux container executor should mount cgroups if not found */
+  public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH =
+    NM_PREFIX + "linux-container-executor.cgroups.mount-path";
+  
   /** T-file compression types used to compress aggregated logs.*/
   public static final String NM_LOG_AGG_COMPRESSION_TYPE = 
     NM_PREFIX + "log-aggregation.compression-type";

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml Wed Feb  6 15:22:57 2013
@@ -535,6 +535,39 @@
   </property>
 
   <property>
+    <description>The class which should help the LCE handle resources.</description>
+    <name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
+    <value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value>
+    <!-- <value>org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler</value> -->
+  </property>
+
+  <property>
+    <description>The cgroups hierarchy under which to place YARN proccesses (cannot contain commas).
+    If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have
+    been pre-configured), then this cgroups hierarchy must already exist and be writable by the
+    NodeManager user, otherwise the NodeManager may fail.
+    Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    <name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
+    <value>/hadoop-yarn</value>
+  </property>
+
+  <property>
+    <description>Whether the LCE should attempt to mount cgroups if not found.
+    Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler.</description>
+    <name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <description>Where the LCE should attempt to mount cgroups if not found. Common locations
+    include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux
+    distribution in use. This path must exist before the NodeManager is launched.
+    Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and
+    yarn.nodemanager.linux-container-executor.cgroups.mount is true.</description>
+    <name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
+  </property>
+
+  <property>
     <description>T-file compression types used to compress aggregated logs.</description>
     <name>yarn.nodemanager.log-aggregation.compression-type</name>
     <value>none</value>

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java Wed Feb  6 15:22:57 2013
@@ -29,6 +29,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.StringUtils;
@@ -38,6 +39,8 @@ import org.apache.hadoop.yarn.conf.YarnC
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
+import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler;
+import org.apache.hadoop.yarn.server.nodemanager.util.LCEResourcesHandler;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 
 public class LinuxContainerExecutor extends ContainerExecutor {
@@ -46,11 +49,18 @@ public class LinuxContainerExecutor exte
       .getLog(LinuxContainerExecutor.class);
 
   private String containerExecutorExe;
+  private LCEResourcesHandler resourcesHandler;
+  
   
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
     containerExecutorExe = getContainerExecutorExecutablePath(conf);
+    
+    resourcesHandler = ReflectionUtils.newInstance(
+            conf.getClass(YarnConfiguration.NM_LINUX_CONTAINER_RESOURCES_HANDLER,
+              DefaultLCEResourcesHandler.class, LCEResourcesHandler.class), conf);
+    resourcesHandler.setConf(conf);
   }
 
   /**
@@ -81,7 +91,8 @@ public class LinuxContainerExecutor exte
     UNABLE_TO_EXECUTE_CONTAINER_SCRIPT(7),
     INVALID_CONTAINER_PID(9),
     INVALID_CONTAINER_EXEC_PERMISSIONS(22),
-    INVALID_CONFIG_FILE(24);
+    INVALID_CONFIG_FILE(24),
+    WRITE_CGROUP_FAILED(27);
 
     private final int value;
     ResultCode(int value) {
@@ -124,6 +135,8 @@ public class LinuxContainerExecutor exte
       throw new IOException("Linux container executor not configured properly"
           + " (error=" + exitCode + ")", e);
     }
+   
+    resourcesHandler.init(this);
   }
   
   @Override
@@ -188,6 +201,11 @@ public class LinuxContainerExecutor exte
 
     ContainerId containerId = container.getContainerID();
     String containerIdStr = ConverterUtils.toString(containerId);
+    
+    resourcesHandler.preExecute(containerId,
+            container.getLaunchContext().getResource());
+    String resourcesOptions = resourcesHandler.getResourcesOption(
+            containerId);
 
     ShellCommandExecutor shExec = null;
 
@@ -202,7 +220,8 @@ public class LinuxContainerExecutor exte
             nmPrivateTokensPath.toUri().getPath().toString(),
             pidFilePath.toString(),
             StringUtils.join(",", localDirs),
-            StringUtils.join(",", logDirs)));
+            StringUtils.join(",", logDirs),
+            resourcesOptions));
         String[] commandArray = command.toArray(new String[command.size()]);
         shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd
             container.getLaunchContext().getEnvironment()); // sanitized env
@@ -241,7 +260,7 @@ public class LinuxContainerExecutor exte
       }
       return exitCode;
     } finally {
-      ; //
+      resourcesHandler.postExecute(containerId);
     }
     if (LOG.isDebugEnabled()) {
       LOG.debug("Output from LinuxContainerExecutor's launchContainer follows:");
@@ -316,4 +335,27 @@ public class LinuxContainerExecutor exte
       }
     }
   }
+  
+  public void mountCgroups(List<String> cgroupKVs, String hierarchy)
+         throws IOException {
+    List<String> command = new ArrayList<String>(
+            Arrays.asList(containerExecutorExe, "--mount-cgroups", hierarchy));
+    command.addAll(cgroupKVs);
+    
+    String[] commandArray = command.toArray(new String[command.size()]);
+    ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
+
+    if (LOG.isDebugEnabled()) {
+        LOG.debug("mountCgroups: " + Arrays.toString(commandArray));
+    }
+
+    try {
+        shExec.execute();
+    } catch (IOException e) {
+        int ret_code = shExec.getExitCode();
+        logOutput(shExec.getOutput());
+        throw new IOException("Problem mounting cgroups " + cgroupKVs + 
+                  "; exit code = " + ret_code, e);
+    }
+  }  
 }

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c Wed Feb  6 15:22:57 2013
@@ -308,7 +308,7 @@ char ** extract_values(char *value) {
       tempTok = strtok_r(NULL, ",", &tempstr);
     }
   }
-  if (size > 0) {
+  if (toPass != NULL) {
     toPass[size] = NULL;
   }
   return toPass;
@@ -323,3 +323,52 @@ void free_values(char** values) {
     free(values);
   }
 }
+
+/**
+ * If str is a string of the form key=val, find 'key'
+ */
+int get_kv_key(const char *input, char *out, size_t out_len) {
+
+  if (input == NULL)
+    return -EINVAL;
+
+  char *split = strchr(input, '=');
+
+  if (split == NULL)
+    return -EINVAL;
+
+  int key_len = split - input;
+
+  if (out_len < (key_len + 1) || out == NULL)
+    return -ENAMETOOLONG;
+
+  memcpy(out, input, key_len);
+  out[key_len] = '\0';
+
+  return 0;
+}
+
+/**
+ * If str is a string of the form key=val, find 'val'
+ */
+int get_kv_value(const char *input, char *out, size_t out_len) {
+
+  if (input == NULL)
+    return -EINVAL;
+
+  char *split = strchr(input, '=');
+
+  if (split == NULL)
+    return -EINVAL;
+
+  split++; // advance past '=' to the value
+  int val_len = (input + strlen(input)) - split;
+
+  if (out_len < (val_len + 1) || out == NULL)
+    return -ENAMETOOLONG;
+
+  memcpy(out, split, val_len);
+  out[val_len] = '\0';
+
+  return 0;
+}

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h Wed Feb  6 15:22:57 2013
@@ -16,6 +16,8 @@
  * limitations under the License.
  */
 
+#include <stddef.h>
+
 /**
  * Ensure that the configuration file and all of the containing directories
  * are only writable by root. Otherwise, an attacker can change the 
@@ -50,3 +52,28 @@ void free_values(char** values);
 //method to free allocated configuration
 void free_configurations();
 
+/**
+ * If str is a string of the form key=val, find 'key'
+ * 
+ * @param input    The input string
+ * @param out      Where to put the output string.
+ * @param out_len  The length of the output buffer.
+ *
+ * @return         -ENAMETOOLONG if out_len is not long enough;
+ *                 -EINVAL if there is no equals sign in the input;
+ *                 0 on success
+ */
+int get_kv_key(const char *input, char *out, size_t out_len);
+
+/**
+ * If str is a string of the form key=val, find 'val'
+ * 
+ * @param input    The input string
+ * @param out      Where to put the output string.
+ * @param out_len  The length of the output buffer.
+ *
+ * @return         -ENAMETOOLONG if out_len is not long enough;
+ *                 -EINVAL if there is no equals sign in the input;
+ *                 0 on success
+ */
+int get_kv_value(const char *input, char *out, size_t out_len);

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c Wed Feb  6 15:22:57 2013
@@ -31,6 +31,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
+#include <sys/mount.h>
 
 static const int DEFAULT_MIN_USERID = 1000;
 
@@ -151,6 +152,44 @@ static int change_effective_user(uid_t u
 }
 
 /**
+ * Write the pid of the current process to the cgroup file.
+ * cgroup_file: Path to cgroup file where pid needs to be written to.
+ */
+static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) {
+  uid_t user = geteuid();
+  gid_t group = getegid();
+  if (change_effective_user(0, 0) != 0) {
+    return -1;
+  }
+
+  // open
+  int cgroup_fd = open(cgroup_file, O_WRONLY | O_APPEND, 0);
+  if (cgroup_fd == -1) {
+    fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", cgroup_file,
+           strerror(errno));
+    return -1;
+  }
+
+  // write pid
+  char pid_buf[21];
+  snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
+  ssize_t written = write(cgroup_fd, pid_buf, strlen(pid_buf));
+  close(cgroup_fd);
+  if (written == -1) {
+    fprintf(LOGFILE, "Failed to write pid to file %s - %s\n",
+       cgroup_file, strerror(errno));
+    return -1;
+  }
+
+  // Revert back to the calling user.
+  if (change_effective_user(user, group)) {
+    return -1;
+  }
+
+  return 0;
+}
+
+/**
  * Write the pid of the current process into the pid file.
  * pid_file: Path to pid file where pid needs to be written to
  */
@@ -810,7 +849,8 @@ int launch_container_as_user(const char 
                    const char *container_id, const char *work_dir,
                    const char *script_name, const char *cred_file,
                    const char* pid_file, char* const* local_dirs,
-                   char* const* log_dirs) {
+                   char* const* log_dirs, const char *resources_key,
+                   char* const* resources_values) {
   int exit_code = -1;
   char *script_file_dest = NULL;
   char *cred_file_dest = NULL;
@@ -849,7 +889,22 @@ int launch_container_as_user(const char 
       || write_pid_to_file_as_nm(pid_file, pid) != 0) {
     exit_code = WRITE_PIDFILE_FAILED;
     goto cleanup;
-  }  
+  }
+
+  // cgroups-based resource enforcement
+  if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
+
+    // write pid to cgroups
+    char* const* cgroup_ptr;
+    for (cgroup_ptr = resources_values; cgroup_ptr != NULL && 
+         *cgroup_ptr != NULL; ++cgroup_ptr) {
+      if (strcmp(*cgroup_ptr, "none") != 0 &&
+            write_pid_to_cgroup_as_root(*cgroup_ptr, pid) != 0) {
+        exit_code = WRITE_CGROUP_FAILED;
+        goto cleanup;
+      }
+    }
+  }
 
   // give up root privs
   if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
@@ -1108,4 +1163,73 @@ int delete_as_user(const char *user,
   return ret;
 }
 
+void chown_dir_contents(const char *dir_path, uid_t uid, gid_t gid) {
+  DIR *dp;
+  struct dirent *ep;
+
+  char *path_tmp = malloc(strlen(dir_path) + NAME_MAX + 2);
+  if (path_tmp == NULL) {
+    return;
+  }
+
+  char *buf = stpncpy(path_tmp, dir_path, strlen(dir_path));
+  *buf++ = '/';
+     
+  dp = opendir(dir_path);
+  if (dp != NULL) {
+    while (ep = readdir(dp)) {
+      stpncpy(buf, ep->d_name, strlen(ep->d_name));
+      buf[strlen(ep->d_name)] = '\0';
+      change_owner(path_tmp, uid, gid);
+    }
+    closedir(dp);
+  }
+
+  free(path_tmp);
+}
+
+/**
+ * Mount a cgroup controller at the requested mount point and create
+ * a hierarchy for the Hadoop NodeManager to manage.
+ * pair: a key-value pair of the form "controller=mount-path"
+ * hierarchy: the top directory of the hierarchy for the NM
+ */
+int mount_cgroup(const char *pair, const char *hierarchy) {
+  char *controller = malloc(strlen(pair));
+  char *mount_path = malloc(strlen(pair));
+  char hier_path[PATH_MAX];
+  int result = 0;
+
+  if (get_kv_key(pair, controller, strlen(pair)) < 0 ||
+      get_kv_value(pair, mount_path, strlen(pair)) < 0) {
+    fprintf(LOGFILE, "Failed to mount cgroup controller; invalid option: %s\n",
+              pair);
+    result = -1; 
+  } else {
+    if (mount("none", mount_path, "cgroup", 0, controller) == 0) {
+      char *buf = stpncpy(hier_path, mount_path, strlen(mount_path));
+      *buf++ = '/';
+      snprintf(buf, PATH_MAX - (buf - hier_path), "%s", hierarchy);
+
+      // create hierarchy as 0750 and chown to Hadoop NM user
+      const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP;
+      if (mkdirs(hier_path, perms) == 0) {
+        change_owner(hier_path, nm_uid, nm_gid);
+        chown_dir_contents(hier_path, nm_uid, nm_gid);
+      }
+    } else {
+      fprintf(LOGFILE, "Failed to mount cgroup controller %s at %s - %s\n",
+                controller, mount_path, strerror(errno));
+      // if controller is already mounted, don't stop trying to mount others
+      if (errno != EBUSY) {
+        result = -1;
+      }
+    }
+  }
+
+  free(controller);
+  free(mount_path);
+
+  return result;
+}
 

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h Wed Feb  6 15:22:57 2013
@@ -53,7 +53,8 @@ enum errorcodes {
   // PREPARE_JOB_LOGS_FAILED (NOT USED) 23
   INVALID_CONFIG_FILE =  24,
   SETSID_OPER_FAILED = 25,
-  WRITE_PIDFILE_FAILED = 26
+  WRITE_PIDFILE_FAILED = 26,
+  WRITE_CGROUP_FAILED = 27
 };
 
 #define NM_GROUP_KEY "yarn.nodemanager.linux-container-executor.group"
@@ -111,13 +112,16 @@ int initialize_app(const char *user, con
  * @param pid_file file where pid of process should be written to
  * @param local_dirs nodemanager-local-directories to be used
  * @param log_dirs nodemanager-log-directories to be used
+ * @param resources_key type of resource enforcement (none, cgroups)
+ * @param resources_value values needed to apply resource enforcement
  * @return -1 or errorcode enum value on error (should never return on success).
  */
 int launch_container_as_user(const char * user, const char *app_id,
                      const char *container_id, const char *work_dir,
                      const char *script_name, const char *cred_file,
                      const char *pid_file, char* const* local_dirs,
-                     char* const* log_dirs);
+                     char* const* log_dirs, const char *resources_key,
+                     char* const* resources_value);
 
 /**
  * Function used to signal a container launched by the user.
@@ -196,3 +200,5 @@ int initialize_user(const char *user, ch
 int create_directory_for_user(const char* path);
 
 int change_user(uid_t user, gid_t group);
+
+int mount_cgroup(const char *pair, const char *hierarchy);

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c Wed Feb  6 15:22:57 2013
@@ -46,13 +46,16 @@ void display_usage(FILE *stream) {
   fprintf(stream,
           "Usage: container-executor --checksetup\n");
   fprintf(stream,
+          "Usage: container-executor --mount-cgroups "\
+          "hierarchy controller=path...\n");
+  fprintf(stream,
       "Usage: container-executor user command command-args\n");
   fprintf(stream, "Commands:\n");
   fprintf(stream, "   initialize container: %2d appid tokens " \
    "nm-local-dirs nm-log-dirs cmd app...\n", INITIALIZE_CONTAINER);
   fprintf(stream,
       "   launch container:    %2d appid containerid workdir "\
-      "container-script tokens pidfile nm-local-dirs nm-log-dirs\n",
+      "container-script tokens pidfile nm-local-dirs nm-log-dirs resources\n",
 	  LAUNCH_CONTAINER);
   fprintf(stream, "   signal container:    %2d container-pid signal\n",
 	  SIGNAL_CONTAINER);
@@ -63,14 +66,21 @@ void display_usage(FILE *stream) {
 int main(int argc, char **argv) {
   int invalid_args = 0; 
   int do_check_setup = 0;
+  int do_mount_cgroups = 0;
   
   LOGFILE = stdout;
   ERRORFILE = stderr;
 
+  if (argc > 1) {
+    if (strcmp("--mount-cgroups", argv[1]) == 0) {
+      do_mount_cgroups = 1;
+    }
+  }
+
   // Minimum number of arguments required to run 
   // the std. container-executor commands is 4
   // 4 args not needed for checksetup option
-  if (argc < 4) {
+  if (argc < 4 && !do_mount_cgroups) {
     invalid_args = 1;
     if (argc == 2) {
       const char *arg1 = argv[1];
@@ -103,6 +113,7 @@ int main(int argc, char **argv) {
   char *orig_conf_file = HADOOP_CONF_DIR "/" CONF_FILENAME;
   char *conf_file = resolve_config_path(orig_conf_file, argv[0]);
   char *local_dirs, *log_dirs;
+  char *resources, *resources_key, *resources_value;
 
   if (conf_file == NULL) {
     fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file);
@@ -145,6 +156,18 @@ int main(int argc, char **argv) {
     return 0;
   }
 
+  if (do_mount_cgroups) {
+    optind++;
+    char *hierarchy = argv[optind++];
+    int result = 0;
+
+    while (optind < argc && result == 0) {
+      result = mount_cgroup(argv[optind++], hierarchy);
+    }
+
+    return result;
+  }
+
   //checks done for user name
   if (argv[optind] == NULL) {
     fprintf(ERRORFILE, "Invalid user name.\n");
@@ -180,8 +203,8 @@ int main(int argc, char **argv) {
                                extract_values(log_dirs), argv + optind);
     break;
   case LAUNCH_CONTAINER:
-    if (argc != 11) {
-      fprintf(ERRORFILE, "Too few arguments (%d vs 11) for launch container\n",
+    if (argc != 12) {
+      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 12) for launch container\n",
 	      argc);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
@@ -194,10 +217,26 @@ int main(int argc, char **argv) {
     pid_file = argv[optind++];
     local_dirs = argv[optind++];// good local dirs as a comma separated list
     log_dirs = argv[optind++];// good log dirs as a comma separated list
+    resources = argv[optind++];// key,value pair describing resources
+    char *resources_key = malloc(strlen(resources));
+    char *resources_value = malloc(strlen(resources));
+    if (get_kv_key(resources, resources_key, strlen(resources)) < 0 ||
+        get_kv_value(resources, resources_value, strlen(resources)) < 0) {
+        fprintf(ERRORFILE, "Invalid arguments for cgroups resources: %s",
+                           resources);
+        fflush(ERRORFILE);
+        free(resources_key);
+        free(resources_value);
+        return INVALID_ARGUMENT_NUMBER;
+    }
+    char** resources_values = extract_values(resources_value);
     exit_code = launch_container_as_user(user_detail->pw_name, app_id,
                     container_id, current_dir, script_file, cred_file,
                     pid_file, extract_values(local_dirs),
-                    extract_values(log_dirs));
+                    extract_values(log_dirs), resources_key,
+                    resources_values);
+    free(resources_key);
+    free(resources_value);
     break;
   case SIGNAL_CONTAINER:
     if (argc != 5) {

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c Wed Feb  6 15:22:57 2013
@@ -39,6 +39,7 @@
 static char* username = NULL;
 static char* local_dirs = NULL;
 static char* log_dirs = NULL;
+static char* resources = NULL;
 
 /**
  * Run the command using the effective user id.
@@ -610,9 +611,17 @@ void test_run_container() {
 	   strerror(errno));
     exit(1);
   } else if (child == 0) {
+    char *key = malloc(strlen(resources));
+    char *value = malloc(strlen(resources));
+    if (get_kv_key(resources, key, strlen(resources)) < 0 ||
+        get_kv_value(resources, key, strlen(resources)) < 0) {
+        printf("FAIL: resources failed - %s\n");
+        exit(1);
+    }
     if (launch_container_as_user(username, "app_4", "container_1", 
           container_dir, script_name, TEST_ROOT "/creds.txt", pid_file,
-          extract_values(local_dirs), extract_values(log_dirs)) != 0) {
+          extract_values(local_dirs), extract_values(log_dirs),
+          key, extract_values(value)) != 0) {
       printf("FAIL: failed in child\n");
       exit(42);
     }

Modified: hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java?rev=1443014&r1=1443013&r2=1443014&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java (original)
+++ hadoop/common/branches/branch-2.0.3-alpha/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java Wed Feb  6 15:22:57 2013
@@ -126,7 +126,7 @@ public class TestLinuxContainerExecutorW
     assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId,
         workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(),
         StringUtils.join(",", dirsHandler.getLocalDirs()),
-        StringUtils.join(",", dirsHandler.getLogDirs())),
+        StringUtils.join(",", dirsHandler.getLogDirs()), "cgroups=none"),
         readMockParams());
     
   }
@@ -211,7 +211,8 @@ public class TestLinuxContainerExecutorW
     assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId,
         workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(),
         StringUtils.join(",", dirsHandler.getLocalDirs()),
-        StringUtils.join(",", dirsHandler.getLogDirs())), readMockParams());
+        StringUtils.join(",", dirsHandler.getLogDirs()),
+        "cgroups=none"), readMockParams());
 
   }