You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2016/03/30 02:34:20 UTC

[2/2] mesos git commit: Added GPUs as an explicit resource in the agent.

Added GPUs as an explicit resource in the agent.

Currently, we enforce that the number of GPUs specified in the 'gpus'
resource parameter equal the number of GPUs passed in via the
--nvidia_gpu_devices flag. In the future, we will generalize this via
autodiscovery of GPUs and support for GPU types other than Nvidia.

Review: https://reviews.apache.org/r/44366/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/77fae968
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/77fae968
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/77fae968

Branch: refs/heads/master
Commit: 77fae968ff80e8516d42ead384956952d5132d46
Parents: a04b8b6
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Mar 29 17:19:49 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Mar 29 17:33:44 2016 -0700

----------------------------------------------------------------------
 include/mesos/resources.hpp               |  1 +
 include/mesos/v1/resources.hpp            |  1 +
 src/common/resources.cpp                  | 14 ++++++++++-
 src/slave/containerizer/containerizer.cpp | 32 ++++++++++++++++++++++++++
 src/v1/resources.cpp                      | 14 ++++++++++-
 5 files changed, 60 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/77fae968/include/mesos/resources.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/resources.hpp b/include/mesos/resources.hpp
index bb343ad..a557e97 100644
--- a/include/mesos/resources.hpp
+++ b/include/mesos/resources.hpp
@@ -324,6 +324,7 @@ public:
   // TODO(vinod): Fix this when we make these types as first class
   // protobufs.
   Option<double> cpus() const;
+  Option<double> gpus() const;
   Option<Bytes> mem() const;
   Option<Bytes> disk() const;
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/77fae968/include/mesos/v1/resources.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/resources.hpp b/include/mesos/v1/resources.hpp
index 719110f..a5ba8fe 100644
--- a/include/mesos/v1/resources.hpp
+++ b/include/mesos/v1/resources.hpp
@@ -324,6 +324,7 @@ public:
   // TODO(vinod): Fix this when we make these types as first class
   // protobufs.
   Option<double> cpus() const;
+  Option<double> gpus() const;
   Option<Bytes> mem() const;
   Option<Bytes> disk() const;
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/77fae968/src/common/resources.cpp
----------------------------------------------------------------------
diff --git a/src/common/resources.cpp b/src/common/resources.cpp
index 818eb8b..f6ff92b 100644
--- a/src/common/resources.cpp
+++ b/src/common/resources.cpp
@@ -1101,9 +1101,10 @@ Try<Resources> Resources::apply(const Offer::Operation& operation) const
   // The following are sanity checks to ensure the amount of each type of
   // resource does not change.
   // TODO(jieyu): Currently, we only check known resource types like
-  // cpus, mem, disk, ports, etc. We should generalize this.
+  // cpus, gpus, mem, disk, ports, etc. We should generalize this.
 
   CHECK(result.cpus() == cpus());
+  CHECK(result.gpus() == gpus());
   CHECK(result.mem() == mem());
   CHECK(result.disk() == disk());
   CHECK(result.ports() == ports());
@@ -1227,6 +1228,17 @@ Option<double> Resources::cpus() const
 }
 
 
+Option<double> Resources::gpus() const
+{
+  Option<Value::Scalar> value = get<Value::Scalar>("gpus");
+  if (value.isSome()) {
+    return value->value();
+  } else {
+    return None();
+  }
+}
+
+
 Option<Bytes> Resources::mem() const
 {
   Option<Value::Scalar> value = get<Value::Scalar>("mem");

http://git-wip-us.apache.org/repos/asf/mesos/blob/77fae968/src/slave/containerizer/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.cpp b/src/slave/containerizer/containerizer.cpp
index 3556040..d0cae79 100644
--- a/src/slave/containerizer/containerizer.cpp
+++ b/src/slave/containerizer/containerizer.cpp
@@ -22,6 +22,7 @@
 
 #include <stout/fs.hpp>
 #include <stout/hashmap.hpp>
+#include <stout/numify.hpp>
 #include <stout/os.hpp>
 #include <stout/stringify.hpp>
 #include <stout/strings.hpp>
@@ -92,6 +93,37 @@ Try<Resources> Containerizer::resources(const Flags& flags)
         flags.default_role).get();
   }
 
+  // GPU resource.
+  // We currently do not support GPU discovery, so we require that
+  // GPUs are explicitly specified in `--resources`. When Nvidia GPU
+  // support is enabled, we also require the GPU devices to be
+  // specified in `--nvidia_gpu_devices`.
+  if (strings::contains(flags.resources.getOrElse(""), "gpus")) {
+    // Make sure that the value of `gpus` is actually an integer and
+    // not a fractional amount. We take advantage of the fact that we
+    // know the value of `gpus` is only precise up to 3 decimals.
+    long long millis = static_cast<long long>(resources.gpus().get() * 1000);
+    if ((millis % 1000) != 0) {
+      return Error("The `gpus` resource must specified as an unsigned integer");
+    }
+
+#ifdef ENABLE_NVIDIA_GPU_SUPPORT
+    // Verify that the number of GPUs in `--nvidia_gpu_devices`
+    // matches the number of GPUs specified as a resource. In the
+    // future we will do discovery of GPUs, which will make the
+    // `--nvidia_gpu_devices` flag optional.
+    if (!flags.nvidia_gpu_devices.isSome()) {
+      return Error("When specifying the `gpus` resource, you must also specify"
+                   " a list of GPUs via the `--nvidia_gpu_devices` flag");
+    }
+
+    if (flags.nvidia_gpu_devices->size() != resources.gpus().get())
+      return Error("The number of GPUs passed in the '--nvidia_gpu_devices'"
+                   " flag must match the number of GPUs specified in the 'gpus'"
+                   " resource");
+#endif // ENABLE_NVIDIA_GPU_SUPPORT
+  }
+
   // Memory resource.
   if (!strings::contains(flags.resources.getOrElse(""), "mem")) {
     // No memory specified so probe OS or resort to DEFAULT_MEM.

http://git-wip-us.apache.org/repos/asf/mesos/blob/77fae968/src/v1/resources.cpp
----------------------------------------------------------------------
diff --git a/src/v1/resources.cpp b/src/v1/resources.cpp
index 4907040..8c3f2d1 100644
--- a/src/v1/resources.cpp
+++ b/src/v1/resources.cpp
@@ -1104,9 +1104,10 @@ Try<Resources> Resources::apply(const Offer::Operation& operation) const
   // The following are sanity checks to ensure the amount of each type of
   // resource does not change.
   // TODO(jieyu): Currently, we only check known resource types like
-  // cpus, mem, disk, ports, etc. We should generalize this.
+  // cpus, gpus, mem, disk, ports, etc. We should generalize this.
 
   CHECK(result.cpus() == cpus());
+  CHECK(result.gpus() == gpus());
   CHECK(result.mem() == mem());
   CHECK(result.disk() == disk());
   CHECK(result.ports() == ports());
@@ -1230,6 +1231,17 @@ Option<double> Resources::cpus() const
 }
 
 
+Option<double> Resources::gpus() const
+{
+  Option<Value::Scalar> value = get<Value::Scalar>("gpus");
+  if (value.isSome()) {
+    return value->value();
+  } else {
+    return None();
+  }
+}
+
+
 Option<Bytes> Resources::mem() const
 {
   Option<Value::Scalar> value = get<Value::Scalar>("mem");