You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/04/09 00:51:00 UTC
svn commit: r1465819 - in /incubator/mesos/trunk:
src/slave/cgroups_isolator.cpp src/slave/cgroups_isolator.hpp
src/slave/flags.hpp
third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp
Author: vinodkone
Date: Mon Apr 8 22:51:00 2013
New Revision: 1465819
URL: http://svn.apache.org/r1465819
Log:
Added support for CPU hard limits via CFS Bandwidth Control.
CFS is unique relative to existing Mesos cgroups support in that it
is a "subfeature" of an already supported cgroups subsystem, cpu.
Also, there are two "tunables" for configuring CFS bandwidth
limiting.
There are 4 approaches one could take:
1) Use the CFS bandwidth limiting if the feature is present.
2) Expose as separate flag, eg "cpu,cfs,memory,freezer".
3) Add feature flag support to subsystems via an additional delimiter,
e.g., "cpu+cfs,memory,freezer".
4) Add an additional control flag via some other means.
Option 2's downside breaks the 1:1 mapping between cgroups subsystems
and a cgroups resource flag.
Option 3's downside is it greatly increases complexity of parsing
cgroups subsystem flags.
This diff takes option 1.
From: David Mackey <td...@booleanhaiku.com>
Review: https://reviews.apache.org/r/9464
Modified:
incubator/mesos/trunk/src/slave/cgroups_isolator.cpp
incubator/mesos/trunk/src/slave/cgroups_isolator.hpp
incubator/mesos/trunk/src/slave/flags.hpp
incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp
Modified: incubator/mesos/trunk/src/slave/cgroups_isolator.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolator.cpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolator.cpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolator.cpp Mon Apr 8 22:51:00 2013
@@ -34,6 +34,7 @@
#include <process/dispatch.hpp>
#include <stout/bytes.hpp>
+#include <stout/duration.hpp>
#include <stout/error.hpp>
#include <stout/exit.hpp>
#include <stout/foreach.hpp>
@@ -79,8 +80,13 @@ using state::FrameworkState;
using state::ExecutorState;
using state::RunState;
+// CPU subsystem constants.
const size_t CPU_SHARES_PER_CPU = 1024;
const size_t MIN_CPU_SHARES = 10;
+const Duration CPU_CFS_PERIOD = Milliseconds(100.0); // Linux default.
+const Duration MIN_CPU_CFS_QUOTA = Milliseconds(1.0);
+
+// Memory subsystem constants.
const size_t MIN_MEMORY_MB = 32 * Megabyte;
@@ -252,6 +258,7 @@ void CgroupsIsolator::initialize(
// Check if the hierarchy is already mounted, and if not, mount it.
Try<bool> mounted = cgroups::mounted(hierarchy);
+
if (mounted.isError()) {
LOG(FATAL) << "Failed to determine if " << hierarchy
<< " is already mounted: " << mounted.error();
@@ -259,6 +266,7 @@ void CgroupsIsolator::initialize(
// Make sure that all the desired subsystems are attached to the
// already mounted hierarchy.
Try<set<string> > attached = cgroups::subsystems(hierarchy);
+
if (attached.isError()) {
LOG(FATAL) << "Failed to determine the attached subsystems "
<< "for the cgroup hierarchy at " << hierarchy << ": "
@@ -394,6 +402,7 @@ void CgroupsIsolator::initialize(
numify<unsigned int>(strings::trim(startEnd[0]));
Try<unsigned int> end =
numify<unsigned int>(strings::trim(startEnd[1]));
+
CHECK(start.isSome() && end.isSome())
<< "Failed to parse cpu range '" << range
<< "' from cpuset.cpus '" << cpuset.get() << "'";
@@ -404,9 +413,11 @@ void CgroupsIsolator::initialize(
} else {
// Case id (e.g. 7 in 0-2,7,12-14).
Try<unsigned int> cpuId = numify<unsigned int>(range);
+
CHECK_SOME(cpuId)
<< "Failed to parse cpu '" << range << "' from cpuset.cpus '"
<< cpuset.get() << "'";
+
cgroupCpus.insert(cpuId.get());
}
}
@@ -421,7 +432,9 @@ void CgroupsIsolator::initialize(
// Initialize our cpu allocations.
Try<list<proc::CPU> > cpus = proc::cpus();
+
CHECK_SOME(cpus) << "Failed to extract CPUs from /proc/cpuinfo";
+
foreach (const proc::CPU& cpu, cpus.get()) {
if (this->cpus.size() >= cpusResource.value()) {
break;
@@ -440,6 +453,30 @@ void CgroupsIsolator::initialize(
handlers["mem"] = &CgroupsIsolator::memChanged;
}
+ // Add handlers for optional subsystem features.
+ if (flags.cgroups_enable_cfs) {
+ // Verify dependent subsystem is present and kernel supports CFS controls.
+ if (!subsystems.contains("cpu")) {
+ EXIT(1) << "The 'cfs' cgroups feature flag is dependent on the 'cpu' "
+ << "subsystem.\n"
+ << "Please enable the cpu subsystem to use the cfs feature.";
+ }
+
+ exists = cgroups::exists(hierarchy, flags.cgroups_root, "cpu.cfs_quota_us");
+
+ CHECK_SOME(exists)
+ << "Failed to determine if 'cpu.cfs_quota_us' control exists";
+
+ if (!exists.get()) {
+ EXIT(1) << "Failed to find 'cpu.cfs_quota_us'. Your kernel "
+ << "might be too old to use the CFS cgroups feature";
+ }
+
+ // Make "cfsChanged" the cpu resource handler.
+ // TODO(tdmackey): Allow multiple handlers per resource.
+ handlers["cpus"] = &CgroupsIsolator::cfsChanged;
+ }
+
initialized = true;
}
@@ -494,6 +531,7 @@ void CgroupsIsolator::launchExecutor(
// Create a new cgroup for the executor.
Try<Nothing> create = cgroups::create(hierarchy, info->name());
+
if (create.isError()) {
LOG(FATAL) << "Failed to create cgroup for executor " << executorId
<< " of framework " << frameworkId
@@ -568,6 +606,7 @@ void CgroupsIsolator::launchExecutor(
// at 0. For more details, refer to
// http://www.kernel.org/doc/Documentation/cgroups/memory.txt
Try<Nothing> assign = cgroups::assign(hierarchy, info->name(), ::getpid());
+
if (assign.isError()) {
EXIT(1) << "Failed to assign executor '" << executorId
<< "' of framework " << frameworkId
@@ -636,6 +675,7 @@ void CgroupsIsolator::resourcesChanged(
foreach (const Resource& resource, resources) {
if (handlers.contains(resource.name())) {
Try<Nothing> result = (this->*handlers[resource.name()])(info, resource);
+
if (result.isError()) {
LOG(ERROR) << result.error();
}
@@ -788,16 +828,17 @@ Try<Nothing> CgroupsIsolator::cpusChange
}
double cpus = resource.scalar().value();
- size_t cpuShares =
- std::max((size_t)(CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
+ size_t shares =
+ std::max((size_t) (CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
Try<Nothing> write = cgroups::write(
- hierarchy, info->name(), "cpu.shares", stringify(cpuShares));
+ hierarchy, info->name(), "cpu.shares", stringify(shares));
+
if (write.isError()) {
return Error("Failed to update 'cpu.shares': " + write.error());
}
- LOG(INFO) << "Updated 'cpu.shares' to " << cpuShares
+ LOG(INFO) << "Updated 'cpu.shares' to " << shares
<< " for executor " << info->executorId
<< " of framework " << info->frameworkId;
@@ -811,10 +852,7 @@ Try<Nothing> CgroupsIsolator::cpusetChan
{
CHECK_NOTNULL(info->cpuset);
CHECK(resource.name() == "cpus");
-
- if (resource.type() != Value::SCALAR) {
- return Error("Expecting resource 'cpus' to be a scalar");
- }
+ CHECK(resource.type() == Value::SCALAR);
double delta = resource.scalar().value() - info->cpuset->usage();
@@ -834,6 +872,7 @@ Try<Nothing> CgroupsIsolator::cpusetChan
Try<Nothing> write = cgroups::write(
hierarchy, info->name(), "cpuset.cpus", stringify(*(info->cpuset)));
+
if (write.isError()) {
return Error("Failed to update 'cpuset.cpus': " + write.error());
}
@@ -846,6 +885,44 @@ Try<Nothing> CgroupsIsolator::cpusetChan
}
+Try<Nothing> CgroupsIsolator::cfsChanged(
+ CgroupInfo* info,
+ const Resource& resource)
+{
+ CHECK(resource.name() == "cpus");
+ CHECK(resource.type() == Value::SCALAR);
+
+ Try<Nothing> write = cgroups::write(
+ hierarchy, info->name(), "cpu.cfs_period_us", stringify(CPU_CFS_PERIOD.us()));
+
+ if (write.isError()) {
+ return Error("Failed to update 'cpu.cfs_period_us': " + write.error());
+ }
+
+ double cpus = resource.scalar().value();
+ size_t quota =
+ std::max(CPU_CFS_PERIOD.us() * cpus, MIN_CPU_CFS_QUOTA.us());
+
+ write = cgroups::write(
+ hierarchy, info->name(), "cpu.cfs_quota_us", stringify(quota));
+
+ if (write.isError()) {
+ return Error("Failed to update 'cpu.cfs_quota_us': " + write.error());
+ }
+
+ LOG(INFO) << "Updated 'cpu.cfs_period_us' to " << CPU_CFS_PERIOD.us()
+ << " and 'cpu.cfs_quota_us' to " << quota
+ << " for executor " << info->executorId
+ << " of framework " << info->frameworkId;
+
+ // Set cpu.shares as well.
+ // TODO(tdmackey): Allow multiple handlers per resource.
+ cpusChanged(info, resource);
+
+ return Nothing();
+}
+
+
Try<Nothing> CgroupsIsolator::memChanged(
CgroupInfo* info,
const Resource& resource)
@@ -869,7 +946,7 @@ Try<Nothing> CgroupsIsolator::memChanged
// might not be able to decrease 'memory.limit_in_bytes' if too much
// memory is being used. This is probably okay if the machine has
// available resources; TODO(benh): Introduce a MemoryWatcherProcess
- // which monitors the descrepancy between usage and soft limit and
+ // which monitors the discrepancy between usage and soft limit and
// introduces a "manual oom" if necessary.
string control = "memory.limit_in_bytes";
@@ -891,6 +968,7 @@ Try<Nothing> CgroupsIsolator::memChanged
Try<Nothing> write = cgroups::write(
hierarchy, info->name(), control, stringify(limitInBytes));
+
if (write.isError()) {
return Error("Failed to update '" + control + "': " + write.error());
}
Modified: incubator/mesos/trunk/src/slave/cgroups_isolator.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolator.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolator.hpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolator.hpp Mon Apr 8 22:51:00 2013
@@ -201,6 +201,15 @@ private:
CgroupInfo* info,
const Resource& resource);
+ // The callback which will be invoked when "cpus" resource has changed,
+ // and the cfs cgroups feature flag is enabled..
+ // @param info The Cgroup information.
+ // @param resources The handle for the resources.
+ // @return Whether the operation succeeds.
+ Try<Nothing> cfsChanged(
+ CgroupInfo* info,
+ const Resource& resource);
+
// The callback which will be invoked when "mem" resource has changed.
// @param info The Cgroup information.
// @param resources The handle for the resources.
Modified: incubator/mesos/trunk/src/slave/flags.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/flags.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/flags.hpp (original)
+++ incubator/mesos/trunk/src/slave/flags.hpp Mon Apr 8 22:51:00 2013
@@ -159,6 +159,12 @@ public:
"cgroups_subsystems",
"List of subsystems to enable (e.g., 'cpu,freezer')\n",
"cpu,memory,freezer");
+
+ add(&Flags::cgroups_enable_cfs,
+ "cgroups_enable_cfs",
+ "Cgroups feature flag to enable hard limits on CPU resources\n"
+ "via the CFS bandwidth limiting subfeature.\n",
+ false);
#endif
}
@@ -182,6 +188,7 @@ public:
std::string cgroups_hierarchy;
std::string cgroups_root;
std::string cgroups_subsystems;
+ bool cgroups_enable_cfs;
#endif
};
Modified: incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp?rev=1465819&r1=1465818&r2=1465819&view=diff
==============================================================================
--- incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp (original)
+++ incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/multihashmap.hpp Mon Apr 8 22:51:00 2013
@@ -82,7 +82,7 @@ bool multihashmap<K, V>::remove(const K&
template <typename K, typename V>
bool multihashmap<K, V>::contains(const K& key) const
{
- return count(key) > 0;
+ return multihashmap<K, V>::count(key) > 0;
}