You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by kl...@apache.org on 2017/03/13 00:32:31 UTC

mesos git commit: Added ability to dynamically load '/dev/nvidia-uvm' in GPU isolator.

Repository: mesos
Updated Branches:
  refs/heads/master 3593dd76e -> ec319931c


Added ability to dynamically load '/dev/nvidia-uvm' in GPU isolator.

Review: https://reviews.apache.org/r/57539/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ec319931
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ec319931
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ec319931

Branch: refs/heads/master
Commit: ec319931cd6ab4af2f9d32a1d0985f6b4af24648
Parents: 3593dd7
Author: Kevin Klues <kl...@gmail.com>
Authored: Sun Mar 12 17:25:40 2017 -0700
Committer: Kevin Klues <kl...@gmail.com>
Committed: Sun Mar 12 17:25:40 2017 -0700

----------------------------------------------------------------------
 .../containerizer/mesos/isolators/gpu/isolator.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ec319931/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
index a3b9fe7..d4ab539 100644
--- a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
@@ -157,6 +157,23 @@ Try<Isolator*> NvidiaGpuIsolatorProcess::create(
 
   deviceEntries[Path("/dev/nvidiactl")] = entry;
 
+  // The `nvidia-uvm` module is not typically loaded by default on
+  // systems that have Nvidia GPU drivers installed. Instead,
+  // applications that require this module use `nvidia-modprobe` to
+  // load it dynamically on first use. This program both loads the
+  // `nvidia-uvm` kernel module and creates the corresponding
+  // `/dev/nvidia-uvm` device that it controls.
+  //
+  // We call `nvidia-modprobe` here to ensure that `/dev/nvidia-uvm`
+  // is properly created so we can inject it into any containers that
+  // may require it.
+  if (!os::exists("/dev/nvidia-uvm")) {
+    Try<string> modprobe = os::shell("nvidia-modprobe -u -c 0");
+    if (modprobe.isError()) {
+      return Error("Failed to load '/dev/nvidia-uvm': " + modprobe.error());
+    }
+  }
+
   device = os::stat::rdev("/dev/nvidia-uvm");
   if (device.isError()) {
     return Error("Failed to obtain device ID for '/dev/nvidia-uvm': " +