You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/04/09 00:51:00 UTC

svn commit: r1465819 - in /incubator/mesos/trunk: src/slave/cgroups_isolator.cpp src/slave/cgroups_isolator.hpp src/slave/flags.hpp third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp

Author: vinodkone
Date: Mon Apr  8 22:51:00 2013
New Revision: 1465819

URL: http://svn.apache.org/r1465819
Log:
Added support for CPU hard limits via CFS Bandwidth Control.

CFS is unique relative to existing Mesos cgroups support in that it
is a "subfeature" of an already supported cgroups subsystem, cpu.
Also, there are two "tunables" for configuring CFS bandwidth
limiting.

There are 4 approaches one could take:
1) Use the CFS bandwidth limiting if the feature is present.
2) Expose as separate flag, eg "cpu,cfs,memory,freezer".
3) Add feature flag support to subsystems via an additional delimiter,
 e.g., "cpu+cfs,memory,freezer".
4) Add an additional control flag via some other means.

Option 2's downside breaks the 1:1 mapping between cgroups subsystems
and a cgroups resource flag.

Option 3's downside is it greatly increases complexity of parsing
cgroups subsystem flags.

This diff takes option 1.

From: David Mackey <td...@booleanhaiku.com>
Review: https://reviews.apache.org/r/9464

Modified:
    incubator/mesos/trunk/src/slave/cgroups_isolator.cpp
    incubator/mesos/trunk/src/slave/cgroups_isolator.hpp
    incubator/mesos/trunk/src/slave/flags.hpp
    incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp

Modified: incubator/mesos/trunk/src/slave/cgroups_isolator.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolator.cpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolator.cpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolator.cpp Mon Apr  8 22:51:00 2013
@@ -34,6 +34,7 @@
 #include <process/dispatch.hpp>
 
 #include <stout/bytes.hpp>
+#include <stout/duration.hpp>
 #include <stout/error.hpp>
 #include <stout/exit.hpp>
 #include <stout/foreach.hpp>
@@ -79,8 +80,13 @@ using state::FrameworkState;
 using state::ExecutorState;
 using state::RunState;
 
+// CPU subsystem constants.
 const size_t CPU_SHARES_PER_CPU = 1024;
 const size_t MIN_CPU_SHARES = 10;
+const Duration CPU_CFS_PERIOD = Milliseconds(100.0); // Linux default.
+const Duration MIN_CPU_CFS_QUOTA = Milliseconds(1.0);
+
+// Memory subsystem constants.
 const size_t MIN_MEMORY_MB = 32 * Megabyte;
 
 
@@ -252,6 +258,7 @@ void CgroupsIsolator::initialize(
 
   // Check if the hierarchy is already mounted, and if not, mount it.
   Try<bool> mounted = cgroups::mounted(hierarchy);
+
   if (mounted.isError()) {
     LOG(FATAL) << "Failed to determine if " << hierarchy
                << " is already mounted: " << mounted.error();
@@ -259,6 +266,7 @@ void CgroupsIsolator::initialize(
     // Make sure that all the desired subsystems are attached to the
     // already mounted hierarchy.
     Try<set<string> > attached = cgroups::subsystems(hierarchy);
+
     if (attached.isError()) {
       LOG(FATAL) << "Failed to determine the attached subsystems "
                  << "for the cgroup hierarchy at " << hierarchy << ": "
@@ -394,6 +402,7 @@ void CgroupsIsolator::initialize(
           numify<unsigned int>(strings::trim(startEnd[0]));
         Try<unsigned int> end =
           numify<unsigned int>(strings::trim(startEnd[1]));
+
         CHECK(start.isSome() && end.isSome())
           << "Failed to parse cpu range '" << range
           << "' from cpuset.cpus '" << cpuset.get() << "'";
@@ -404,9 +413,11 @@ void CgroupsIsolator::initialize(
       } else {
         // Case id (e.g. 7 in 0-2,7,12-14).
         Try<unsigned int> cpuId = numify<unsigned int>(range);
+
         CHECK_SOME(cpuId)
           << "Failed to parse cpu '" << range << "' from cpuset.cpus '"
           << cpuset.get()  << "'";
+
         cgroupCpus.insert(cpuId.get());
       }
     }
@@ -421,7 +432,9 @@ void CgroupsIsolator::initialize(
 
     // Initialize our cpu allocations.
     Try<list<proc::CPU> > cpus = proc::cpus();
+
     CHECK_SOME(cpus) << "Failed to extract CPUs from /proc/cpuinfo";
+
     foreach (const proc::CPU& cpu, cpus.get()) {
       if (this->cpus.size() >= cpusResource.value()) {
         break;
@@ -440,6 +453,30 @@ void CgroupsIsolator::initialize(
     handlers["mem"] = &CgroupsIsolator::memChanged;
   }
 
+  // Add handlers for optional subsystem features.
+  if (flags.cgroups_enable_cfs) {
+    // Verify dependent subsystem is present and kernel supports CFS controls.
+    if (!subsystems.contains("cpu")) {
+      EXIT(1) << "The 'cfs' cgroups feature flag is dependent on the 'cpu' "
+              << "subsystem.\n"
+              << "Please enable the cpu subsystem to use the cfs feature.";
+    }
+
+    exists = cgroups::exists(hierarchy, flags.cgroups_root, "cpu.cfs_quota_us");
+
+    CHECK_SOME(exists)
+      << "Failed to determine if 'cpu.cfs_quota_us' control exists";
+
+    if (!exists.get()) {
+      EXIT(1) << "Failed to find 'cpu.cfs_quota_us'. Your kernel "
+              << "might be too old to use the CFS cgroups feature";
+    }
+
+    // Make "cfsChanged" the cpu resource handler.
+    // TODO(tdmackey): Allow multiple handlers per resource.
+    handlers["cpus"] = &CgroupsIsolator::cfsChanged;
+  }
+
   initialized = true;
 }
 
@@ -494,6 +531,7 @@ void CgroupsIsolator::launchExecutor(
 
   // Create a new cgroup for the executor.
   Try<Nothing> create = cgroups::create(hierarchy, info->name());
+
   if (create.isError()) {
     LOG(FATAL) << "Failed to create cgroup for executor " << executorId
                << " of framework " << frameworkId
@@ -568,6 +606,7 @@ void CgroupsIsolator::launchExecutor(
     // at 0. For more details, refer to
     // http://www.kernel.org/doc/Documentation/cgroups/memory.txt
     Try<Nothing> assign = cgroups::assign(hierarchy, info->name(), ::getpid());
+
     if (assign.isError()) {
       EXIT(1) << "Failed to assign executor '" << executorId
               << "' of framework " << frameworkId
@@ -636,6 +675,7 @@ void CgroupsIsolator::resourcesChanged(
   foreach (const Resource& resource, resources) {
     if (handlers.contains(resource.name())) {
       Try<Nothing> result = (this->*handlers[resource.name()])(info, resource);
+
       if (result.isError()) {
         LOG(ERROR) << result.error();
       }
@@ -788,16 +828,17 @@ Try<Nothing> CgroupsIsolator::cpusChange
   }
 
   double cpus = resource.scalar().value();
-  size_t cpuShares =
-    std::max((size_t)(CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
+  size_t shares =
+    std::max((size_t) (CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
 
   Try<Nothing> write = cgroups::write(
-      hierarchy, info->name(), "cpu.shares", stringify(cpuShares));
+      hierarchy, info->name(), "cpu.shares", stringify(shares));
+
   if (write.isError()) {
     return Error("Failed to update 'cpu.shares': " + write.error());
   }
 
-  LOG(INFO) << "Updated 'cpu.shares' to " << cpuShares
+  LOG(INFO) << "Updated 'cpu.shares' to " << shares
             << " for executor " << info->executorId
             << " of framework " << info->frameworkId;
 
@@ -811,10 +852,7 @@ Try<Nothing> CgroupsIsolator::cpusetChan
 {
   CHECK_NOTNULL(info->cpuset);
   CHECK(resource.name() == "cpus");
-
-  if (resource.type() != Value::SCALAR) {
-    return Error("Expecting resource 'cpus' to be a scalar");
-  }
+  CHECK(resource.type() == Value::SCALAR);
 
   double delta = resource.scalar().value() - info->cpuset->usage();
 
@@ -834,6 +872,7 @@ Try<Nothing> CgroupsIsolator::cpusetChan
 
   Try<Nothing> write = cgroups::write(
       hierarchy, info->name(), "cpuset.cpus", stringify(*(info->cpuset)));
+
   if (write.isError()) {
     return Error("Failed to update 'cpuset.cpus': " + write.error());
   }
@@ -846,6 +885,44 @@ Try<Nothing> CgroupsIsolator::cpusetChan
 }
 
 
+Try<Nothing> CgroupsIsolator::cfsChanged(
+    CgroupInfo* info,
+    const Resource& resource)
+{
+  CHECK(resource.name() == "cpus");
+  CHECK(resource.type() == Value::SCALAR);
+
+  Try<Nothing> write = cgroups::write(
+      hierarchy, info->name(), "cpu.cfs_period_us", stringify(CPU_CFS_PERIOD.us()));
+
+  if (write.isError()) {
+    return Error("Failed to update 'cpu.cfs_period_us': " + write.error());
+  }
+
+  double cpus = resource.scalar().value();
+  size_t quota =
+    std::max(CPU_CFS_PERIOD.us() * cpus, MIN_CPU_CFS_QUOTA.us());
+
+  write = cgroups::write(
+      hierarchy, info->name(), "cpu.cfs_quota_us", stringify(quota));
+
+  if (write.isError()) {
+    return Error("Failed to update 'cpu.cfs_quota_us': " + write.error());
+  }
+
+  LOG(INFO) << "Updated 'cpu.cfs_period_us' to " << CPU_CFS_PERIOD.us()
+            << " and 'cpu.cfs_quota_us' to " << quota
+            << " for executor " << info->executorId
+            << " of framework " << info->frameworkId;
+
+  // Set cpu.shares as well.
+  // TODO(tdmackey): Allow multiple handlers per resource.
+  cpusChanged(info, resource);
+
+  return Nothing();
+}
+
+
 Try<Nothing> CgroupsIsolator::memChanged(
     CgroupInfo* info,
     const Resource& resource)
@@ -869,7 +946,7 @@ Try<Nothing> CgroupsIsolator::memChanged
   // might not be able to decrease 'memory.limit_in_bytes' if too much
   // memory is being used. This is probably okay if the machine has
   // available resources; TODO(benh): Introduce a MemoryWatcherProcess
-  // which monitors the descrepancy between usage and soft limit and
+  // which monitors the discrepancy between usage and soft limit and
   // introduces a "manual oom" if necessary.
   string control = "memory.limit_in_bytes";
 
@@ -891,6 +968,7 @@ Try<Nothing> CgroupsIsolator::memChanged
 
   Try<Nothing> write = cgroups::write(
       hierarchy, info->name(), control, stringify(limitInBytes));
+
   if (write.isError()) {
     return Error("Failed to update '" + control + "': " + write.error());
   }

Modified: incubator/mesos/trunk/src/slave/cgroups_isolator.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolator.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolator.hpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolator.hpp Mon Apr  8 22:51:00 2013
@@ -201,6 +201,15 @@ private:
       CgroupInfo* info,
       const Resource& resource);
 
+  // The callback which will be invoked when "cpus" resource has changed,
+  // and the cfs cgroups feature flag is enabled..
+  // @param   info          The Cgroup information.
+  // @param   resources     The handle for the resources.
+  // @return  Whether the operation succeeds.
+  Try<Nothing> cfsChanged(
+      CgroupInfo* info,
+      const Resource& resource);
+
   // The callback which will be invoked when "mem" resource has changed.
   // @param   info          The Cgroup information.
   // @param   resources     The handle for the resources.

Modified: incubator/mesos/trunk/src/slave/flags.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/flags.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/flags.hpp (original)
+++ incubator/mesos/trunk/src/slave/flags.hpp Mon Apr  8 22:51:00 2013
@@ -159,6 +159,12 @@ public:
         "cgroups_subsystems",
         "List of subsystems to enable (e.g., 'cpu,freezer')\n",
         "cpu,memory,freezer");
+
+    add(&Flags::cgroups_enable_cfs,
+        "cgroups_enable_cfs",
+        "Cgroups feature flag to enable hard limits on CPU resources\n"
+        "via the CFS bandwidth limiting subfeature.\n",
+        false);
 #endif
   }
 
@@ -182,6 +188,7 @@ public:
   std::string cgroups_hierarchy;
   std::string cgroups_root;
   std::string cgroups_subsystems;
+  bool cgroups_enable_cfs;
 #endif
 };
 

Modified: incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp (original)
+++ incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp Mon Apr  8 22:51:00 2013
@@ -82,7 +82,7 @@ bool multihashmap<K, V>::remove(const K&
 template <typename K, typename V>
 bool multihashmap<K, V>::contains(const K& key) const
 {
-  return count(key) > 0;
+  return multihashmap<K, V>::count(key) > 0;
 }