You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2016/07/06 01:53:41 UTC

[1/2] mesos git commit: Added Nvidia devices as default devices to inject into every container.

Repository: mesos
Updated Branches:
  refs/heads/master fbaa52c4a -> 57b52a0ee


Added Nvidia devices as default devices to inject into every container.

This is a temporary hack to make Nvidia devices available to a
container when file system isolation is enabled. Ideally, we would
have a generic mechanism of injecting new devices into a continaer
from an isolator, but this feature is currently lacking so we need to
resort to hard-coding them for now.

In essence, this commit adds all Nvidia devices to the default list of
devices on machines that have Nvidia GPUs. Without the 'gpu/nvidia'
isolator enabled, this means that containers have free reign to use
these devices however they want. However, with the 'gpu/nvidia'
isolator enabled, they will have restricted access to only those GPUs
they have been allocated (though they will be able to see all of them
on the file system). These are similar semantics to the current
support for Nvidia GPUs without filesystem isolation.

In the future, we will restrict injecting these devices to only occur
when the 'nvidia/gpu' isolator is enabled.

Review: https://reviews.apache.org/r/49668/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fdd2fc1c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fdd2fc1c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fdd2fc1c

Branch: refs/heads/master
Commit: fdd2fc1c1b61f51cd8073933a69c428301039916
Parents: fbaa52c
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Jul 5 18:39:06 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Jul 5 18:39:06 2016 -0700

----------------------------------------------------------------------
 src/linux/fs.cpp | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/fdd2fc1c/src/linux/fs.cpp
----------------------------------------------------------------------
diff --git a/src/linux/fs.cpp b/src/linux/fs.cpp
index 3fe8203..27d322c 100644
--- a/src/linux/fs.cpp
+++ b/src/linux/fs.cpp
@@ -21,6 +21,8 @@
 #include <linux/limits.h>
 #include <linux/unistd.h>
 
+#include <list>
+
 #include <stout/adaptor.hpp>
 #include <stout/check.hpp>
 #include <stout/error.hpp>
@@ -37,6 +39,7 @@
 
 #include "linux/fs.hpp"
 
+using std::list;
 using std::string;
 using std::vector;
 
@@ -512,8 +515,26 @@ Try<Nothing> createStandardDevices(const string& root)
     "zero"
   };
 
+  // Glob all Nvidia GPU devices on the system and add them to the
+  // list of devices injected into the chroot environment.
+  //
+  // TODO(klueska): Only inject these devices if the 'gpu/nvidia'
+  // isolator is enabled.
+  Try<list<string>> nvidia = os::glob("/dev/nvidia*");
+  if (nvidia.isError()) {
+    return Error("Failed to glob /dev/nvidia* on the host filesystem:"
+                 " " + nvidia.error());
+  }
+
+  foreach (const string& device, nvidia.get()) {
+    if (os::exists(device)) {
+      devices.push_back(Path(device).basename());
+    }
+  }
+
+  // Inject each device into the chroot environment. Copy both the
+  // mode and the device itself from the corresponding host device.
   foreach (const string& device, devices) {
-    // Copy the mode and device from the corresponding host device.
     Try<Nothing> copy = copyDeviceNode(
         path::join("/",  "dev", device),
         path::join(root, "dev", device));


[2/2] mesos git commit: Inject Nvidia libraries for Docker images in mesos containerizer.

Posted by bm...@apache.org.
Inject Nvidia libraries for Docker images in mesos containerizer.

When Docker images have the matching Nvidia label, we will inject
the volume which contains the Nvidia libraries / binaries.

Similar support will be added for the Docker containerizer.

Review: https://reviews.apache.org/r/49669/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/57b52a0e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/57b52a0e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/57b52a0e

Branch: refs/heads/master
Commit: 57b52a0ee4bc9016f6b3de29606015c1216fb5b1
Parents: fdd2fc1
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Jul 5 18:42:52 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Jul 5 18:42:52 2016 -0700

----------------------------------------------------------------------
 .../mesos/isolators/gpu/isolator.cpp            | 63 +++++++++++++++++++-
 .../mesos/isolators/gpu/isolator.hpp            |  6 ++
 2 files changed, 66 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/57b52a0e/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
index d407cab..c7e8cf0 100644
--- a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
@@ -48,6 +48,8 @@
 
 using cgroups::devices::Entry;
 
+using docker::spec::v1::ImageManifest;
+
 using mesos::slave::ContainerConfig;
 using mesos::slave::ContainerLaunchInfo;
 using mesos::slave::ContainerLimitation;
@@ -73,10 +75,12 @@ NvidiaGpuIsolatorProcess::NvidiaGpuIsolatorProcess(
     const Flags& _flags,
     const string& _hierarchy,
     const NvidiaGpuAllocator& _allocator,
+    const NvidiaVolume& _volume,
     const map<Path, cgroups::devices::Entry>& _controlDeviceEntries)
   : flags(_flags),
     hierarchy(_hierarchy),
     allocator(_allocator),
+    volume(_volume),
     controlDeviceEntries(_controlDeviceEntries) {}
 
 
@@ -167,6 +171,7 @@ Try<Isolator*> NvidiaGpuIsolatorProcess::create(
           flags,
           hierarchy.get(),
           components.allocator,
+          components.volume,
           deviceEntries));
 
   return new MesosIsolator(process);
@@ -266,9 +271,61 @@ Future<Option<ContainerLaunchInfo>> NvidiaGpuIsolatorProcess::prepare(
   }
 
   return update(containerId, containerConfig.executor_info().resources())
-    .then([]() -> Future<Option<ContainerLaunchInfo>> {
-      return None();
-    });
+    .then(defer(PID<NvidiaGpuIsolatorProcess>(this),
+                &NvidiaGpuIsolatorProcess::_prepare,
+                containerConfig));
+}
+
+
+// If our `ContainerConfig` specifies a different `rootfs` than the
+// host file system, then we need to prepare a script to inject our
+// `NvidiaVolume` into the container (if required).
+Future<Option<ContainerLaunchInfo>> NvidiaGpuIsolatorProcess::_prepare(
+    const mesos::slave::ContainerConfig& containerConfig)
+{
+  if (!containerConfig.has_rootfs()) {
+     return None();
+  }
+
+  // We only support docker containers at the moment.
+  if (!containerConfig.has_docker()) {
+    // TODO(klueska): Once ContainerConfig has
+    // a type, include that in the error message.
+    return Failure("Nvidia GPU isolator does not support non-Docker images");
+  }
+
+  ContainerLaunchInfo launchInfo;
+  launchInfo.set_namespaces(CLONE_NEWNS);
+
+  // Inject the Nvidia volume into the container.
+  //
+  // TODO(klueska): Inject the Nvidia devices here as well once we
+  // have a way to pass them to `fs:enter()` instead of hardcoding
+  // them in `fs::createStandardDevices()`.
+  if (!containerConfig.docker().has_manifest()) {
+     return Failure("The 'ContainerConfig' for docker is missing a manifest");
+  }
+
+  ImageManifest manifest = containerConfig.docker().manifest();
+
+  if (volume.shouldInject(manifest)) {
+    const string target = path::join(
+        containerConfig.rootfs(),
+        volume.CONTAINER_PATH());
+
+    Try<Nothing> mkdir = os::mkdir(target);
+    if (mkdir.isError()) {
+      return Failure(
+          "Failed to create the container directory at"
+          " '" + target + "': " + mkdir.error());
+    }
+
+    launchInfo.add_pre_exec_commands()->set_value(
+      "mount --no-mtab --rbind --read-only " +
+      volume.HOST_PATH() + " " + target);
+  }
+
+  return launchInfo;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/57b52a0e/src/slave/containerizer/mesos/isolators/gpu/isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/gpu/isolator.hpp b/src/slave/containerizer/mesos/isolators/gpu/isolator.hpp
index ceb3795..d94f6cf 100644
--- a/src/slave/containerizer/mesos/isolators/gpu/isolator.hpp
+++ b/src/slave/containerizer/mesos/isolators/gpu/isolator.hpp
@@ -35,6 +35,7 @@
 
 #include "slave/containerizer/mesos/isolators/gpu/allocator.hpp"
 #include "slave/containerizer/mesos/isolators/gpu/components.hpp"
+#include "slave/containerizer/mesos/isolators/gpu/volume.hpp"
 
 namespace mesos {
 namespace internal {
@@ -105,8 +106,12 @@ private:
       const Flags& _flags,
       const std::string& hierarchy,
       const NvidiaGpuAllocator& _allocator,
+      const NvidiaVolume& _volume,
       const map<Path, cgroups::devices::Entry>& _controlDeviceEntries);
 
+  virtual process::Future<Option<mesos::slave::ContainerLaunchInfo>> _prepare(
+      const mesos::slave::ContainerConfig& containerConfig);
+
   process::Future<Nothing> _update(
       const ContainerID& containerId,
       const std::set<Gpu>& allocation);
@@ -130,6 +135,7 @@ private:
   hashmap<ContainerID, Info*> infos;
 
   NvidiaGpuAllocator allocator;
+  NvidiaVolume volume;
 
   const map<Path, cgroups::devices::Entry> controlDeviceEntries;
 };