You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/02/12 02:31:07 UTC
[01/11] git commit: Revise os::killtree semantics if process not
found.
Updated Branches:
refs/heads/master 7fc36c053 -> 0f3f8f35a
Revise os::killtree semantics if process not found.
Change semantics so it is not an error if the process is not found.
Review: https://reviews.apache.org/r/17953
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/413947c8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/413947c8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/413947c8
Branch: refs/heads/master
Commit: 413947c809daf944b135d291aa1ba2345bf683d7
Parents: 7fc36c0
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:32:55 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:33:43 2014 -0800
----------------------------------------------------------------------
3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/413947c8/3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp b/3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp
index c5917e7..1f45897 100644
--- a/3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp
+++ b/3rdparty/libprocess/3rdparty/stout/include/stout/os/killtree.hpp
@@ -63,7 +63,9 @@ inline Try<std::list<ProcessTree> > killtree(
Result<Process> process = os::process(pid, processes.get());
if (process.isNone()) {
- return Error("Failed to find process " + stringify(pid));
+ // We do not consider it an error if the process is not present since it
+ // can exit at any time.
+ return std::list<ProcessTree>();
}
struct {
[11/11] git commit: Containerizer - updated tests (part 5).
Posted by vi...@apache.org.
Containerizer - updated tests (part 5).
A few isolator specific tests haven't been updated.
The tests require a different cgroup mount configuration; please see
https://issues.apache.org/jira/browse/MESOS-926 for preliminary
documentation.
Review: https://reviews.apache.org/r/17013
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0f3f8f35
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0f3f8f35
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0f3f8f35
Branch: refs/heads/master
Commit: 0f3f8f35a73dd5f9b35d657b3912449f570243d3
Parents: d0db1ba
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:31:22 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:51 2014 -0800
----------------------------------------------------------------------
src/Makefile.am | 1 +
src/examples/balloon_framework.cpp | 7 +
src/slave/containerizer/mesos_containerizer.cpp | 3 +-
src/tests/allocator_tests.cpp | 8 +-
src/tests/balloon_framework_test.sh | 11 +-
src/tests/cgroups_isolator_tests.cpp | 271 +----------
src/tests/cgroups_tests.cpp | 161 ++++---
src/tests/cluster.hpp | 60 ++-
src/tests/containerizer.cpp | 195 ++++++++
src/tests/containerizer.hpp | 112 +++++
src/tests/environment.cpp | 13 +-
src/tests/fault_tolerance_tests.cpp | 30 +-
src/tests/gc_tests.cpp | 34 +-
src/tests/isolator.hpp | 211 ---------
src/tests/isolator_tests.cpp | 417 ++++++++++++----
src/tests/master_contender_detector_tests.cpp | 1 -
src/tests/master_tests.cpp | 124 ++---
src/tests/mesos.cpp | 187 +++++---
src/tests/mesos.hpp | 41 +-
src/tests/monitor_tests.cpp | 61 +--
src/tests/paths_tests.cpp | 14 +-
src/tests/slave_recovery_tests.cpp | 471 +++++++++++--------
src/tests/test_framework_test.sh | 2 +
23 files changed, 1334 insertions(+), 1101 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 8a6a976..aa8bb2b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -886,6 +886,7 @@ mesos_tests_LDADD = ../$(LIBPROCESS)/3rdparty/libgmock.la libmesos.la
mesos_tests_DEPENDENCIES = # Initialized to allow += below.
if OS_LINUX
+ mesos_tests_SOURCES += tests/cgroups_isolator_tests.cpp
mesos_tests_SOURCES += tests/cgroups_tests.cpp
mesos_tests_SOURCES += tests/fs_tests.cpp
endif
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/examples/balloon_framework.cpp
----------------------------------------------------------------------
diff --git a/src/examples/balloon_framework.cpp b/src/examples/balloon_framework.cpp
index d7abf1f..400764d 100644
--- a/src/examples/balloon_framework.cpp
+++ b/src/examples/balloon_framework.cpp
@@ -105,6 +105,13 @@ public:
resource->set_type(Value::SCALAR);
resource->mutable_scalar()->set_value(mem - EXECUTOR_MEMORY_MB);
+ // And all the CPU.
+ double cpus = getScalarResource(offer, "cpus");
+ resource = task.add_resources();
+ resource->set_name("cpus");
+ resource->set_type(Value::SCALAR);
+ resource->mutable_scalar()->set_value(cpus);
+
tasks.push_back(task);
driver->launchTasks(offer.id(), tasks);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index c61739b..04b9fa1 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -235,7 +235,8 @@ Future<Nothing> MesosContainerizerProcess::_recover(
void asyncSafeFatal(const char* message)
{
// Ignore the return value from write() to silence compiler warning.
- (void) write(STDERR_FILENO, message, strlen(message));
+ while (write(STDERR_FILENO, message, strlen(message)) == -1 &&
+ errno == EINTR);
_exit(1);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/allocator_tests.cpp b/src/tests/allocator_tests.cpp
index 751a6c4..31cc836 100644
--- a/src/tests/allocator_tests.cpp
+++ b/src/tests/allocator_tests.cpp
@@ -35,7 +35,7 @@
#include "master/hierarchical_allocator_process.hpp"
#include "master/master.hpp"
-#include "tests/isolator.hpp"
+#include "tests/containerizer.hpp"
#include "tests/mesos.hpp"
using namespace mesos;
@@ -1089,11 +1089,11 @@ TYPED_TEST(AllocatorTest, FrameworkExited)
MockExecutor exec1(executor1.executor_id());
MockExecutor exec2(executor2.executor_id());
- map<ExecutorID, Executor*> execs;
+ hashmap<ExecutorID, Executor*> execs;
execs[executor1.executor_id()] = &exec1;
execs[executor2.executor_id()] = &exec2;
- TestingIsolator isolator(execs);
+ TestContainerizer containerizer(execs);
slave::Flags flags = this->CreateSlaveFlags();
@@ -1101,7 +1101,7 @@ TYPED_TEST(AllocatorTest, FrameworkExited)
EXPECT_CALL(this->allocator, slaveAdded(_, _, _));
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
MockScheduler sched1;
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/balloon_framework_test.sh
----------------------------------------------------------------------
diff --git a/src/tests/balloon_framework_test.sh b/src/tests/balloon_framework_test.sh
index e7bf4e6..11e9167 100755
--- a/src/tests/balloon_framework_test.sh
+++ b/src/tests/balloon_framework_test.sh
@@ -6,8 +6,13 @@
source ${MESOS_SOURCE_DIR}/support/colors.sh
source ${MESOS_SOURCE_DIR}/support/atexit.sh
-# TODO(benh): Look for an existing hierarchy first.
-TEST_CGROUP_HIERARCHY=/tmp/mesos_test_cgroup
+EXISTING_FREEZER_HIERARCHY=$(cat /proc/mounts | grep ^freezer | cut -f 2 -d ' ')
+if [[ -n ${EXISTING_FREEZER_HIERARCHY} ]]; then
+ # Strip off the subsystem component.
+ TEST_CGROUP_HIERARCHY=${EXISTING_FREEZER_HIERARCHY%/*}
+else
+ TEST_CGROUP_HIERARCHY=/tmp/mesos_test_cgroup
+fi
TEST_CGROUP_ROOT=mesos_test
# Check if the hierarchy exists. If it doesn't, we want to make sure we
@@ -63,7 +68,7 @@ BALLOON_FRAMEWORK=${MESOS_BUILD_DIR}/src/balloon-framework
# to correspond to flags, so we unset these here.
unset MESOS_BUILD_DIR
unset MESOS_SOURCE_DIR
-unset MESOS_LAUNCHER_DIR
+#unset MESOS_LAUNCHER_DIR # leave this so we can find mesos-fetcher.
unset MESOS_VERBOSE
# Launch master.
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/cgroups_isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_isolator_tests.cpp b/src/tests/cgroups_isolator_tests.cpp
index 1f5ce76..5a9704d 100644
--- a/src/tests/cgroups_isolator_tests.cpp
+++ b/src/tests/cgroups_isolator_tests.cpp
@@ -25,7 +25,7 @@
#include <stout/proc.hpp>
#include <stout/stringify.hpp>
-#include "slave/cgroups_isolator.hpp"
+#include "slave/containerizer/mesos_containerizer.hpp"
#include "tests/script.hpp"
@@ -35,272 +35,7 @@ using namespace mesos::internal::slave;
using std::map;
-// Run the balloon framework under the cgroups isolator.
-TEST_SCRIPT(CgroupsIsolatorTest,
+// Run the balloon framework under a mesos containerizer.
+TEST_SCRIPT(ContainerizerTest,
ROOT_CGROUPS_BalloonFramework,
"balloon_framework_test.sh")
-
-
-#define GROW_USAGE(delta, cpuset, usage) \
- ({ \
- const map<proc::CPU, double>& allocation = cpuset.grow(delta, usage); \
- foreachpair (const proc::CPU& cpu, double allocated, allocation) { \
- usage[cpu] += allocated; \
- ASSERT_LT(usage[cpu], 1.001); \
- } \
- })
-
-
-#define SHRINK_USAGE(delta, cpuset, usage) \
- ({ \
- const map<proc::CPU, double>& deallocation = cpuset.shrink(delta); \
- foreachpair (const proc::CPU& cpu, double deallocated, deallocation) { \
- usage[cpu] -= deallocated; \
- ASSERT_GT(usage[cpu], -0.001); \
- } \
- })
-
-
-TEST(CgroupsCpusetTest, OneCPUOneCpuset)
-{
- Cpuset cpuset;
-
- map<proc::CPU, double> usage;
- // NOTE: Using the [] operator here led to a warning with gcc 4.4.3.
- usage.insert(std::make_pair(proc::CPU(0, 0, 0), 0.0));
-
- // Saturate the CPU.
- GROW_USAGE(0.2, cpuset, usage);
- GROW_USAGE(0.1, cpuset, usage);
- GROW_USAGE(0.2, cpuset, usage);
- GROW_USAGE(0.5, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset.usage(), 1.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "0");
-
- // Empty the CPU.
- SHRINK_USAGE(0.5, cpuset, usage);
- SHRINK_USAGE(0.2, cpuset, usage);
- SHRINK_USAGE(0.1, cpuset, usage);
- SHRINK_USAGE(0.2, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 0.0, 0.001);
- ASSERT_NEAR(cpuset.usage(), 0.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "");
-}
-
-
-TEST(CgroupsCpusetTest, OneCPUManyCpusets)
-{
- Cpuset cpuset1, cpuset2, cpuset3;
-
- map<proc::CPU, double> usage;
- // NOTE: Using the [] operator here led to a warning with gcc 4.4.3.
- usage.insert(std::make_pair(proc::CPU(0, 0, 0), 0.0));
-
- // Saturate the CPU.
- GROW_USAGE(0.2, cpuset1, usage);
- GROW_USAGE(0.1, cpuset2, usage);
- GROW_USAGE(0.2, cpuset3, usage);
- GROW_USAGE(0.5, cpuset1, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset1.usage(), 0.7, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.1, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.2, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "0");
- ASSERT_EQ(stringify(cpuset2), "0");
- ASSERT_EQ(stringify(cpuset3), "0");
-
- // Empty the CPU.
- SHRINK_USAGE(0.5, cpuset1, usage);
- SHRINK_USAGE(0.2, cpuset3, usage);
- SHRINK_USAGE(0.1, cpuset2, usage);
- SHRINK_USAGE(0.2, cpuset1, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 0.0, 0.001);
- ASSERT_NEAR(cpuset1.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "");
- ASSERT_EQ(stringify(cpuset2), "");
- ASSERT_EQ(stringify(cpuset3), "");
-}
-
-
-TEST(CgroupsCpusetTest, ManyCPUOneCpuset)
-{
- Cpuset cpuset;
-
- map<proc::CPU, double> usage;
- // NOTE: Using the [] operator here led to a warning with gcc 4.4.3.
- usage.insert(std::make_pair(proc::CPU(0, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(1, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(2, 0, 0), 0.0));
-
- // Saturate the first CPU.
- GROW_USAGE(0.2, cpuset, usage);
- GROW_USAGE(0.1, cpuset, usage);
- GROW_USAGE(0.2, cpuset, usage);
- GROW_USAGE(0.5, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset.usage(), 1.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "0");
-
- // Saturate the second CPU.
- GROW_USAGE(0.6, cpuset, usage);
- GROW_USAGE(0.4, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(1,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset.usage(), 2.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "0,1");
-
- // Partial third CPU.
- GROW_USAGE(0.1, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(2,0,0)], 0.1, 0.001);
- ASSERT_NEAR(cpuset.usage(), 2.1, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "0,1,2");
-
- // Empty the CPU.
- SHRINK_USAGE(0.5, cpuset, usage);
- SHRINK_USAGE(0.2, cpuset, usage);
- SHRINK_USAGE(0.1, cpuset, usage);
- SHRINK_USAGE(0.1, cpuset, usage);
- SHRINK_USAGE(0.2, cpuset, usage);
- SHRINK_USAGE(0.4, cpuset, usage);
- SHRINK_USAGE(0.6, cpuset, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 0.0, 0.001);
- ASSERT_NEAR(cpuset.usage(), 0.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset), "");
-}
-
-
-TEST(CgroupsCpusetTest, ManyCPUManyCpusets)
-{
- Cpuset cpuset1, cpuset2, cpuset3;
-
- map<proc::CPU, double> usage;
- // NOTE: Using the [] operator here led to a warning with gcc 4.4.3.
- usage.insert(std::make_pair(proc::CPU(0, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(1, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(2, 0, 0), 0.0));
-
- // Saturate the first CPU.
- GROW_USAGE(0.2, cpuset1, usage);
- GROW_USAGE(0.1, cpuset2, usage);
- GROW_USAGE(0.2, cpuset3, usage);
- GROW_USAGE(0.5, cpuset1, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset1.usage(), 0.7, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.1, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.2, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "0");
- ASSERT_EQ(stringify(cpuset2), "0");
- ASSERT_EQ(stringify(cpuset3), "0");
-
- // Saturate the second CPU.
- GROW_USAGE(0.6, cpuset3, usage);
- GROW_USAGE(0.4, cpuset2, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(1,0,0)], 1.0, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.5, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.8, 0.001);
-
- ASSERT_EQ(stringify(cpuset2), "0,1");
- ASSERT_EQ(stringify(cpuset3), "0,1");
-
- // Partial third CPU.
- GROW_USAGE(0.1, cpuset2, usage);
-
- ASSERT_NEAR(usage[proc::CPU(2,0,0)], 0.1, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.6, 0.001);
-
- ASSERT_EQ(stringify(cpuset2), "0,1,2");
-
- // Empty the CPU.
- SHRINK_USAGE(0.5, cpuset1, usage);
- SHRINK_USAGE(0.2, cpuset1, usage);
- SHRINK_USAGE(0.1, cpuset2, usage);
- SHRINK_USAGE(0.1, cpuset2, usage);
- SHRINK_USAGE(0.2, cpuset3, usage);
- SHRINK_USAGE(0.4, cpuset2, usage);
- SHRINK_USAGE(0.6, cpuset3, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 0.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(1,0,0)], 0.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(2,0,0)], 0.0, 0.001);
-
- ASSERT_NEAR(cpuset1.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "");
- ASSERT_EQ(stringify(cpuset2), "");
- ASSERT_EQ(stringify(cpuset3), "");
-}
-
-
-TEST(CgroupsCpusetTest, IntegerAllocations)
-{
- // Ensure no fragmentation occurs.
- Cpuset cpuset1, cpuset2, cpuset3;
-
- map<proc::CPU, double> usage;
- // NOTE: Using the [] operator here led to a warning with gcc 4.4.3.
- usage.insert(std::make_pair(proc::CPU(0, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(1, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(2, 0, 0), 0.0));
- usage.insert(std::make_pair(proc::CPU(3, 0, 0), 0.0));
-
- // Saturate the CPUs.
- GROW_USAGE(1.0, cpuset1, usage);
- GROW_USAGE(2.0, cpuset2, usage);
- GROW_USAGE(1.0, cpuset3, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 1.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(1,0,0)], 1.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(2,0,0)], 1.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(3,0,0)], 1.0, 0.001);
-
- ASSERT_NEAR(cpuset1.usage(), 1.0, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 2.0, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 1.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "0");
- ASSERT_EQ(stringify(cpuset2), "1,2");
- ASSERT_EQ(stringify(cpuset3), "3");
-
- // Empty the CPU.
- SHRINK_USAGE(1.0, cpuset1, usage);
- SHRINK_USAGE(2.0, cpuset2, usage);
- SHRINK_USAGE(1.0, cpuset3, usage);
-
- ASSERT_NEAR(usage[proc::CPU(0,0,0)], 0.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(1,0,0)], 0.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(2,0,0)], 0.0, 0.001);
- ASSERT_NEAR(usage[proc::CPU(3,0,0)], 0.0, 0.001);
-
- ASSERT_NEAR(cpuset1.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset2.usage(), 0.0, 0.001);
- ASSERT_NEAR(cpuset3.usage(), 0.0, 0.001);
-
- ASSERT_EQ(stringify(cpuset1), "");
- ASSERT_EQ(stringify(cpuset2), "");
- ASSERT_EQ(stringify(cpuset3), "");
-}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index 0e9316d..f0dead7 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -109,55 +109,65 @@ public:
protected:
virtual void SetUp()
{
- Result<std::string> hierarchy_ = cgroups::hierarchy(subsystems);
- ASSERT_FALSE(hierarchy_.isError());
- if (hierarchy_.isNone()) {
- // Try to mount a hierarchy for testing, retrying as necessary since the
- // previous unmount might not have taken effect yet due to a bug in
- // Ubuntu 12.04.
- ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, subsystems, 10))
- << "-------------------------------------------------------------\n"
- << "We cannot run any cgroups tests that require\n"
- << "a hierarchy with subsystems '" << subsystems << "'\n"
- << "because we failed to find an existing hierarchy\n"
- << "or create a new one. You can either remove all existing\n"
- << "hierarchies, or disable this test case\n"
- << "(i.e., --gtest_filter=-"
- << ::testing::UnitTest::GetInstance()
- ->current_test_info()
- ->test_case_name() << ".*).\n"
- << "-------------------------------------------------------------";
-
- hierarchy = TEST_CGROUPS_HIERARCHY;
- } else {
- hierarchy = hierarchy_.get();
+ foreach (const std::string& subsystem, strings::tokenize(subsystems, ",")) {
+ // Establish the base hierarchy if this is the first subsystem checked.
+ if (baseHierarchy.empty()) {
+ Result<std::string> hierarchy = cgroups::hierarchy(subsystem);
+ ASSERT_FALSE(hierarchy.isError());
+
+ if (hierarchy.isNone()) {
+ baseHierarchy = TEST_CGROUPS_HIERARCHY;
+ } else {
+ // Strip the subsystem to get the base hierarchy.
+ baseHierarchy = strings::remove(
+ hierarchy.get(),
+ subsystem,
+ strings::SUFFIX);
+ }
+ }
+
+ // Mount the subsystem if necessary.
+ std::string hierarchy = path::join(baseHierarchy, subsystem);
+ Try<bool> mounted = cgroups::mounted(hierarchy, subsystem);
+ ASSERT_SOME(mounted);
+ if (!mounted.get()) {
+ ASSERT_SOME(cgroups::mount(hierarchy, subsystem))
+ << "-------------------------------------------------------------\n"
+ << "We cannot run any cgroups tests that require\n"
+ << "a hierarchy with subsystem '" << subsystem << "'\n"
+ << "because we failed to find an existing hierarchy\n"
+ << "or create a new one (tried '" << hierarchy << "').\n"
+ << "You can either remove all existing\n"
+ << "hierarchies, or disable this test case\n"
+ << "(i.e., --gtest_filter=-"
+ << ::testing::UnitTest::GetInstance()
+ ->current_test_info()
+ ->test_case_name() << ".*).\n"
+ << "-------------------------------------------------------------";
+ }
}
-
- // Create a cgroup (removing first if necessary) for the tests to use.
- Try<bool> exists = cgroups::exists(hierarchy, TEST_CGROUPS_ROOT);
- ASSERT_SOME(exists);
- if (exists.get()) {
- AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
- }
- ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
}
virtual void TearDown()
{
// Remove all *our* cgroups.
- Try<bool> exists = cgroups::exists(hierarchy, TEST_CGROUPS_ROOT);
- ASSERT_SOME(exists);
- if (exists.get()) {
- AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
+ foreach (const std::string& subsystem, strings::tokenize(subsystems, ",")) {
+ std::string hierarchy = path::join(baseHierarchy, subsystem);
+
+ Try<std::vector<std::string> > cgroups = cgroups::get(hierarchy);
+ CHECK_SOME(cgroups);
+
+ foreach (const std::string& cgroup, cgroups.get()) {
+ // Remove any cgroups that start with TEST_CGROUPS_ROOT.
+ if (cgroup == TEST_CGROUPS_ROOT) {
+ AWAIT_READY(cgroups::destroy(hierarchy, cgroup));
+ }
+ }
}
-
- // And cleanup TEST_CGROUPS_HIERARCHY in the event it is needed
- // to be created.
- AWAIT_READY(cgroups::cleanup(TEST_CGROUPS_HIERARCHY));
}
const std::string subsystems; // Subsystems required to run tests.
- std::string hierarchy; // Path to the hierarchy being used.
+ std::string baseHierarchy; // Path to the hierarchy being used.
};
@@ -227,7 +237,9 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Subsystems)
TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_SubsystemsHierarchy)
{
- Try<std::set<std::string> > names = cgroups::subsystems(hierarchy);
+ std::string cpuHierarchy = path::join(baseHierarchy, "cpu");
+
+ Try<std::set<std::string> > names = cgroups::subsystems(cpuHierarchy);
ASSERT_SOME(names);
Option<std::string> cpu;
@@ -241,6 +253,22 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_SubsystemsHierarchy)
}
EXPECT_SOME(cpu);
+ EXPECT_NONE(memory);
+
+ std::string memoryHierarchy = path::join(baseHierarchy, "memory");
+ names = cgroups::subsystems(memoryHierarchy);
+ ASSERT_SOME(names);
+
+ cpu = None();
+ memory = None();
+ foreach (const std::string& name, names.get()) {
+ if (name == "cpu") {
+ cpu = name;
+ } else if (name == "memory") {
+ memory = name;
+ }
+ }
+ EXPECT_NONE(cpu);
EXPECT_SOME(memory);
}
@@ -264,9 +292,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Mounted)
{
EXPECT_SOME_FALSE(cgroups::mounted("/tmp-nonexist"));
EXPECT_SOME_FALSE(cgroups::mounted("/tmp"));
- EXPECT_SOME_FALSE(cgroups::mounted(hierarchy + "/not_expected"));
- EXPECT_SOME_TRUE(cgroups::mounted(hierarchy));
- EXPECT_SOME_TRUE(cgroups::mounted(hierarchy + "/"));
+ EXPECT_SOME_FALSE(cgroups::mounted(baseHierarchy + "/not_expected"));
+ EXPECT_SOME_TRUE(cgroups::mounted(baseHierarchy + "/cpu"));
}
@@ -276,25 +303,30 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_MountedSubsystems)
EXPECT_SOME_FALSE(cgroups::mounted("/tmp", "cpu,memory"));
EXPECT_SOME_FALSE(cgroups::mounted("/tmp", "cpu"));
EXPECT_SOME_FALSE(cgroups::mounted("/tmp", "invalid"));
- EXPECT_SOME_TRUE(cgroups::mounted(hierarchy, "cpu,memory"));
- EXPECT_SOME_TRUE(cgroups::mounted(hierarchy, "memory"));
- EXPECT_SOME_FALSE(cgroups::mounted(hierarchy, "invalid"));
- EXPECT_SOME_FALSE(cgroups::mounted(hierarchy + "/not_expected", "cpu"));
+ EXPECT_SOME_TRUE(cgroups::mounted(path::join(baseHierarchy, "cpu"), "cpu"));
+ EXPECT_SOME_TRUE(cgroups::mounted(
+ path::join(baseHierarchy, "memory"), "memory"));
+ EXPECT_SOME_FALSE(cgroups::mounted(baseHierarchy, "invalid"));
+ EXPECT_SOME_FALSE(cgroups::mounted(baseHierarchy + "/not_expected", "cpu"));
}
TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_CreateRemove)
{
EXPECT_ERROR(cgroups::create("/tmp", "test"));
- EXPECT_ERROR(cgroups::create(hierarchy, "mesos_test_missing/1"));
- ASSERT_SOME(cgroups::create(hierarchy, "mesos_test_missing"));
- EXPECT_ERROR(cgroups::remove(hierarchy, "invalid"));
- ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test_missing"));
+ EXPECT_ERROR(cgroups::create(baseHierarchy, "mesos_test_missing/1"));
+ ASSERT_SOME(cgroups::create(
+ path::join(baseHierarchy, "cpu"), "mesos_test_missing"));
+ EXPECT_ERROR(cgroups::remove(baseHierarchy, "invalid"));
+ ASSERT_SOME(cgroups::remove(
+ path::join(baseHierarchy, "cpu"), "mesos_test_missing"));
}
TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Get)
{
+ std::string hierarchy = path::join(baseHierarchy, "cpu");
+
ASSERT_SOME(cgroups::create(hierarchy, "mesos_test1"));
ASSERT_SOME(cgroups::create(hierarchy, "mesos_test2"));
@@ -303,7 +335,6 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Get)
EXPECT_EQ(cgroups.get()[0], "mesos_test2");
EXPECT_EQ(cgroups.get()[1], "mesos_test1");
- EXPECT_EQ(cgroups.get()[2], TEST_CGROUPS_ROOT);
ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test1"));
ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test2"));
@@ -312,6 +343,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Get)
TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_NestedCgroups)
{
+ std::string hierarchy = path::join(baseHierarchy, "cpu");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
ASSERT_SOME(cgroups::create(hierarchy, path::join(TEST_CGROUPS_ROOT, "1")))
<< "-------------------------------------------------------------\n"
<< "We cannot run this test because it appears you do not have\n"
@@ -338,6 +371,7 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_NestedCgroups)
TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Tasks)
{
+ std::string hierarchy = path::join(baseHierarchy, "cpu");
Try<std::set<pid_t> > pids = cgroups::processes(hierarchy, "/");
ASSERT_SOME(pids);
EXPECT_NE(0u, pids.get().count(1));
@@ -347,6 +381,7 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Tasks)
TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Read)
{
+ std::string hierarchy = path::join(baseHierarchy, "cpu");
EXPECT_ERROR(cgroups::read(hierarchy, TEST_CGROUPS_ROOT, "invalid"));
std::string pid = stringify(::getpid());
@@ -359,12 +394,15 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Read)
TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Write)
{
+ std::string hierarchy = path::join(baseHierarchy, "cpu");
EXPECT_ERROR(
cgroups::write(hierarchy, TEST_CGROUPS_ROOT, "invalid", "invalid"));
pid_t pid = ::fork();
ASSERT_NE(-1, pid);
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
if (pid > 0) {
// In parent process.
ASSERT_SOME(
@@ -405,17 +443,19 @@ public:
TEST_F(CgroupsAnyHierarchyWithCpuAcctMemoryTest, ROOT_CGROUPS_Stat)
{
- EXPECT_ERROR(cgroups::stat(hierarchy, TEST_CGROUPS_ROOT, "invalid"));
+ EXPECT_ERROR(cgroups::stat(baseHierarchy, TEST_CGROUPS_ROOT, "invalid"));
Try<hashmap<std::string, uint64_t> > result =
- cgroups::stat(hierarchy, "/", "cpuacct.stat");
+ cgroups::stat(
+ path::join(baseHierarchy, "cpuacct"), "/", "cpuacct.stat");
ASSERT_SOME(result);
EXPECT_TRUE(result.get().contains("user"));
EXPECT_TRUE(result.get().contains("system"));
EXPECT_GT(result.get()["user"], 0llu);
EXPECT_GT(result.get()["system"], 0llu);
- result = cgroups::stat(hierarchy, "/", "memory.stat");
+ result = cgroups::stat(
+ path::join(baseHierarchy, "memory"), "/", "memory.stat");
ASSERT_SOME(result);
EXPECT_TRUE(result.get().contains("rss"));
EXPECT_GT(result.get()["rss"], 0llu);
@@ -424,6 +464,8 @@ TEST_F(CgroupsAnyHierarchyWithCpuAcctMemoryTest, ROOT_CGROUPS_Stat)
TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_Listen)
{
+ std::string hierarchy = path::join(baseHierarchy, "memory");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
ASSERT_SOME(
cgroups::exists(hierarchy, TEST_CGROUPS_ROOT, "memory.oom_control"))
<< "-------------------------------------------------------------\n"
@@ -519,6 +561,9 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryFreezerTest, ROOT_CGROUPS_Freeze)
int dummy;
ASSERT_NE(-1, ::pipe(pipes));
+ std::string hierarchy = path::join(baseHierarchy, "freezer");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
pid_t pid = ::fork();
ASSERT_NE(-1, pid);
@@ -586,6 +631,9 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryFreezerTest, ROOT_CGROUPS_Kill)
int dummy;
ASSERT_NE(-1, ::pipe(pipes));
+ std::string hierarchy = path::join(baseHierarchy, "freezer");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
pid_t pid = ::fork();
ASSERT_NE(-1, pid);
@@ -649,6 +697,9 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryFreezerTest, ROOT_CGROUPS_Destroy)
int dummy;
ASSERT_NE(-1, ::pipe(pipes));
+ std::string hierarchy = path::join(baseHierarchy, "freezer");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
pid_t pid = ::fork();
ASSERT_NE(-1, pid);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/cluster.hpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp
index 1334800..5100959 100644
--- a/src/tests/cluster.hpp
+++ b/src/tests/cluster.hpp
@@ -44,8 +44,7 @@
#include "master/registrar.hpp"
#include "slave/flags.hpp"
-#include "slave/isolator.hpp"
-#include "slave/process_isolator.hpp"
+#include "slave/containerizer/containerizer.hpp"
#include "slave/slave.hpp"
#include "state/leveldb.hpp"
@@ -142,7 +141,7 @@ public:
// The isolator is expected to outlive the launched slave (i.e.,
// until it is stopped via Slaves::stop).
Try<process::PID<slave::Slave> > start(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
const slave::Flags& flags = slave::Flags());
// Start and manage a new slave injecting the specified Master
@@ -153,7 +152,7 @@ public:
const slave::Flags& flags = slave::Flags());
Try<process::PID<slave::Slave> > start(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
process::Owned<MasterDetector> detector,
const slave::Flags& flags = slave::Flags());
@@ -176,13 +175,13 @@ public:
struct Slave
{
Slave()
- : isolator(NULL),
+ : containerizer(NULL),
slave(NULL),
detector(NULL) {}
- // Only register the isolator here if it is created within the
+ // Only register the containerizer here if it is created within the
// Cluster.
- slave::Isolator* isolator;
+ slave::Containerizer* containerizer;
slave::Slave* slave;
process::Owned<MasterDetector> detector;
};
@@ -424,15 +423,18 @@ inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
Slave slave;
- // Create a new process isolator for this slave.
- slave.isolator = new slave::ProcessIsolator();
- process::spawn(slave.isolator);
+ // Create a new containerizer for this slave.
+ Try<slave::Containerizer*> containerizer =
+ slave::Containerizer::create(flags, true);
+ CHECK_SOME(containerizer);
+
+ slave.containerizer = containerizer.get();
// Get a detector for the master(s).
slave.detector = masters->detector();
- slave.slave = new slave::Slave(flags, true, slave.detector.get(), slave.isolator,
- &cluster->files);
+ slave.slave = new slave::Slave(
+ flags, slave.detector.get(), slave.containerizer, &cluster->files);
process::PID<slave::Slave> pid = process::spawn(slave.slave);
slaves[pid] = slave;
@@ -442,10 +444,10 @@ inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
const slave::Flags& flags)
{
- return start(isolator, masters->detector(), flags);
+ return start(containerizer, masters->detector(), flags);
}
@@ -453,12 +455,32 @@ inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
process::Owned<MasterDetector> detector,
const slave::Flags& flags)
{
- return start(new slave::ProcessIsolator(), detector, flags);
+ // TODO(benh): Create a work directory if using the default.
+
+ Slave slave;
+
+ // Create a new containerizer for this slave.
+ Try<slave::Containerizer*> containerizer =
+ slave::Containerizer::create(flags, true);
+ CHECK_SOME(containerizer);
+
+ slave.containerizer = containerizer.get();
+
+ // Get a detector for the master(s).
+ slave.detector = detector;
+
+ slave.slave = new slave::Slave(
+ flags, slave.detector.get(), slave.containerizer, &cluster->files);
+ process::PID<slave::Slave> pid = process::spawn(slave.slave);
+
+ slaves[pid] = slave;
+
+ return pid;
}
inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
process::Owned<MasterDetector> detector,
const slave::Flags& flags)
{
@@ -469,8 +491,8 @@ inline Try<process::PID<slave::Slave> > Cluster::Slaves::start(
// Get a detector for the master(s).
slave.detector = detector;
- slave.slave = new slave::Slave(flags, true, slave.detector.get(),
- isolator, &cluster->files);
+ slave.slave = new slave::Slave(
+ flags, slave.detector.get(), containerizer, &cluster->files);
process::PID<slave::Slave> pid = process::spawn(slave.slave);
slaves[pid] = slave;
@@ -497,7 +519,7 @@ inline Try<Nothing> Cluster::Slaves::stop(
process::wait(slave.slave);
delete slave.slave;
- delete slave.isolator; // May be NULL.
+ delete slave.containerizer; // May be NULL.
slaves.erase(pid);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.cpp b/src/tests/containerizer.cpp
new file mode 100644
index 0000000..bfb9341
--- /dev/null
+++ b/src/tests/containerizer.cpp
@@ -0,0 +1,195 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tests/containerizer.hpp"
+#include "tests/mesos.hpp"
+
+using std::map;
+using std::string;
+
+using namespace process;
+
+namespace mesos {
+namespace internal {
+namespace tests {
+
+
+TestContainerizer::TestContainerizer(
+ const hashmap<ExecutorID, Executor*>& _executors)
+ : executors(_executors)
+{
+ setup();
+}
+
+
+TestContainerizer::TestContainerizer(
+ const ExecutorID& executorId,
+ Executor* executor)
+{
+ executors[executorId] = executor;
+ setup();
+}
+
+
+TestContainerizer::TestContainerizer(MockExecutor* executor)
+{
+ executors[executor->id] = executor;
+ setup();
+}
+
+
+TestContainerizer::TestContainerizer()
+{
+ setup();
+}
+
+
+TestContainerizer::~TestContainerizer()
+{
+ foreachvalue (const Owned<MesosExecutorDriver>& driver, drivers) {
+ driver->stop();
+ driver->join();
+ }
+ drivers.clear();
+}
+
+
+Future<Nothing> TestContainerizer::launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const string& directory,
+ const Option<string>& user,
+ const SlaveID& slaveId,
+ const PID<slave::Slave>& slavePid,
+ bool checkpoint)
+{
+ CHECK(!drivers.contains(containerId))
+ << "Failed to launch executor " << executorInfo.executor_id()
+ << " of framework " << executorInfo.framework_id()
+ << " because it is already launched";
+
+ CHECK(executors.contains(executorInfo.executor_id()))
+ << "Failed to launch executor " << executorInfo.executor_id()
+ << " of framework " << executorInfo.framework_id()
+ << " because it is unknown to the containerizer";
+
+ // Store mapping from (frameworkId, executorId) -> containerId to facilitate
+ // easy destroy from tests.
+ std::pair<FrameworkID, ExecutorID> key(executorInfo.framework_id(),
+ executorInfo.executor_id());
+ containers[key] = containerId;
+
+ Executor* executor = executors[executorInfo.executor_id()];
+ Owned<MesosExecutorDriver> driver(new MesosExecutorDriver(executor));
+ drivers[containerId] = driver;
+
+ // Prepare additional environment variables for the executor.
+ const map<string, string>& env = executorEnvironment(
+ executorInfo,
+ directory,
+ slaveId,
+ slavePid,
+ checkpoint,
+ Duration::zero());
+
+ foreachpair (const string& name, const string variable, env) {
+ os::setenv(name, variable);
+ }
+
+ foreach (const Environment_Variable& variable,
+ executorInfo.command().environment().variables()) {
+ os::setenv(variable.name(), variable.value());
+ }
+ os::setenv("MESOS_LOCAL", "1");
+
+ driver->start();
+
+ foreachkey (const string& name, env) {
+ os::unsetenv(name);
+ }
+
+ foreach(const Environment_Variable& variable,
+ executorInfo.command().environment().variables()) {
+ os::unsetenv(variable.name());
+ }
+ os::unsetenv("MESOS_LOCAL");
+
+ Owned<Promise<slave::Containerizer::Termination> > promise(
+ new Promise<slave::Containerizer::Termination>());
+ promises[containerId] = promise;
+
+ return Nothing();
+}
+
+
+Future<slave::Containerizer::Termination> TestContainerizer::wait(
+ const ContainerID& containerId)
+{
+ CHECK(promises.contains(containerId))
+ << "Container " << containerId << "not started";
+
+ return promises[containerId]->future();
+}
+
+
+void TestContainerizer::destroy(
+ const FrameworkID& frameworkId,
+ const ExecutorID& executorId)
+{
+ std::pair<FrameworkID, ExecutorID> key(frameworkId, executorId);
+ if (!containers.contains(key)) {
+ LOG(WARNING) << "Ignoring destroy of unknown container for executor '"
+ << executorId << "' of framework '" << frameworkId << "'";
+ return;
+ }
+ destroy(containers[key]);
+}
+
+
+void TestContainerizer::destroy(const ContainerID& containerId)
+{
+ CHECK(drivers.contains(containerId))
+ << "Failed to terminate container " << containerId
+ << " because it is has not been started";
+
+ Owned<MesosExecutorDriver> driver = drivers[containerId];
+ driver->stop();
+ driver->join();
+ drivers.erase(containerId);
+
+ promises[containerId]->set(
+ slave::Containerizer::Termination(0, false, "Killed executor"));
+ promises.erase(containerId);
+}
+
+
+void TestContainerizer::setup()
+{
+ EXPECT_CALL(*this, recover(testing::_))
+ .WillRepeatedly(testing::Return(Nothing()));
+
+ EXPECT_CALL(*this, usage(testing::_))
+ .WillRepeatedly(testing::Return(ResourceStatistics()));
+
+ EXPECT_CALL(*this, update(testing::_, testing::_))
+ .WillRepeatedly(testing::Return(Nothing()));
+}
+
+} // namespace tests {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.hpp b/src/tests/containerizer.hpp
new file mode 100644
index 0000000..5686398
--- /dev/null
+++ b/src/tests/containerizer.hpp
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_CONTAINERIZER_HPP__
+#define __TEST_CONTAINERIZER_HPP__
+
+#include <unistd.h>
+
+#include <gmock/gmock.h>
+
+#include <map>
+#include <string>
+
+#include <process/dispatch.hpp>
+#include <process/future.hpp>
+#include <process/pid.hpp>
+
+#include <stout/hashmap.hpp>
+#include <stout/os.hpp>
+#include <stout/try.hpp>
+#include <stout/uuid.hpp>
+
+#include "mesos/executor.hpp"
+#include "mesos/mesos.hpp"
+#include "mesos/resources.hpp"
+
+#include "slave/containerizer/containerizer.hpp"
+
+#include "slave/slave.hpp"
+#include "slave/state.hpp"
+
+namespace mesos {
+namespace internal {
+namespace tests {
+
+// Forward declaration.
+class MockExecutor;
+
+class TestContainerizer : public slave::Containerizer
+{
+public:
+ TestContainerizer(const hashmap<ExecutorID, Executor*>& executors);
+
+ TestContainerizer(const ExecutorID& executorId, Executor* executor);
+
+ TestContainerizer(MockExecutor* executor);
+
+ TestContainerizer();
+
+ virtual ~TestContainerizer();
+
+ virtual process::Future<Nothing> launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user,
+ const SlaveID& slaveId,
+ const process::PID<slave::Slave>& slavePid,
+ bool checkpoint);
+
+ virtual process::Future<slave::Containerizer::Termination> wait(
+ const ContainerID& containerId);
+
+ // Additional destroy method for testing because we won't know the
+ // ContainerID created for each container.
+ void destroy(const FrameworkID& frameworkId, const ExecutorID& executorId);
+
+ virtual void destroy(const ContainerID& containerId);
+
+ MOCK_METHOD1(
+ recover,
+ process::Future<Nothing>(const Option<slave::state::SlaveState>&));
+
+ MOCK_METHOD2(
+ update,
+ process::Future<Nothing>(const ContainerID&, const Resources&));
+
+ MOCK_METHOD1(
+ usage,
+ process::Future<ResourceStatistics>(const ContainerID&));
+
+private:
+ void setup();
+
+ hashmap<ExecutorID, Executor*> executors;
+
+ hashmap<std::pair<FrameworkID, ExecutorID>, ContainerID> containers;
+ hashmap<ContainerID, process::Owned<MesosExecutorDriver> > drivers;
+ hashmap<ContainerID,
+ process::Owned<process::Promise<slave::Containerizer::Termination> > > promises;
+};
+
+} // namespace tests {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __TEST_CONTAINERIZER_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/environment.cpp
----------------------------------------------------------------------
diff --git a/src/tests/environment.cpp b/src/tests/environment.cpp
index 41b8a71..585d73e 100644
--- a/src/tests/environment.cpp
+++ b/src/tests/environment.cpp
@@ -108,21 +108,10 @@ static bool enable(const ::testing::TestInfo& test)
// Now check the type parameter.
if (test.type_param() != NULL) {
const string& type = test.type_param();
- if (strings::contains(type, "CgroupsIsolator") &&
+ if (strings::contains(type, "Cgroups") &&
(os::user() != "root" || !os::exists("/proc/cgroups"))) {
return false;
}
-#ifdef __APPLE__
- if (strings::contains(test.test_case_name(), "IsolatorTest") &&
- strings::contains(test.name(), "Usage") &&
- strings::contains(type, "ProcessIsolator") &&
- os::user() != "root") {
- // We can't run the Isolator resource usage test when we're not
- // the root user on OSX because proc_pidinfo() only returns
- // memory and CPU usage reliably when running as root.
- return false;
- }
-#endif
}
return true;
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 130272c..b0711cf 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -41,10 +41,9 @@
#include "master/master.hpp"
-#include "slave/isolator.hpp"
#include "slave/slave.hpp"
-#include "tests/isolator.hpp"
+#include "tests/containerizer.hpp"
#include "tests/mesos.hpp"
using namespace mesos;
@@ -54,7 +53,6 @@ using namespace mesos::internal::tests;
using mesos::internal::master::Master;
-using mesos::internal::slave::Isolator;
using mesos::internal::slave::Slave;
using mesos::internal::slave::STATUS_UPDATE_RETRY_INTERVAL_MIN;
@@ -483,9 +481,9 @@ TEST_F(FaultToleranceTest, PartitionedSlaveExitedExecutor)
DROP_MESSAGES(Eq("PONG"), _, _);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ Try<PID<Slave> > slave = StartSlave(&containerizer);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -582,10 +580,8 @@ TEST_F(FaultToleranceTest, PartitionedSlaveExitedExecutor)
shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get());
// Induce an ExitedExecutorMessage from the slave.
- dispatch(isolator,
- &Isolator::killExecutor,
- frameworkId.get(),
- DEFAULT_EXECUTOR_INFO.executor_id());
+ containerizer.destroy(
+ frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id());
// Upon receiving the message, the master will shutdown the slave.
AWAIT_READY(shutdownMessage);
@@ -1113,10 +1109,11 @@ TEST_F(FaultToleranceTest, ReregisterFrameworkExitedExecutor)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
+
Owned<MasterDetector> slaveDetector(
new StandaloneMasterDetector(master.get()));
- Try<PID<Slave> > slave = StartSlave(&isolator, slaveDetector);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, slaveDetector);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1189,7 +1186,7 @@ TEST_F(FaultToleranceTest, ReregisterFrameworkExitedExecutor)
DROP_PROTOBUFS(StatusUpdateMessage(), _, _);
// Now kill the executor.
- dispatch(isolator, &Isolator::killExecutor, frameworkId, DEFAULT_EXECUTOR_ID);
+ containerizer.destroy(frameworkId, DEFAULT_EXECUTOR_ID);
AWAIT_READY(executorExitedMessage);
@@ -1683,12 +1680,12 @@ TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
StandaloneMasterDetector* detector =
new StandaloneMasterDetector(master.get());
Try<PID<Slave> > slave =
- StartSlave(&isolator, Owned<MasterDetector>(detector));
+ StartSlave(&containerizer, Owned<MasterDetector>(detector));
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1726,10 +1723,7 @@ TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _);
// Now kill the executor.
- dispatch(isolator,
- &Isolator::killExecutor,
- frameworkId.get(),
- DEFAULT_EXECUTOR_ID);
+ containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID);
AWAIT_READY(executorExitedMessage);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/gc_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/gc_tests.cpp b/src/tests/gc_tests.cpp
index 6638a4a..ac547cf 100644
--- a/src/tests/gc_tests.cpp
+++ b/src/tests/gc_tests.cpp
@@ -49,11 +49,10 @@
#include "slave/constants.hpp"
#include "slave/flags.hpp"
#include "slave/gc.hpp"
-#include "slave/isolator.hpp"
#include "slave/paths.hpp"
#include "slave/slave.hpp"
-#include "tests/isolator.hpp"
+#include "tests/containerizer.hpp"
#include "tests/mesos.hpp"
#include "tests/utils.hpp"
@@ -65,7 +64,6 @@ using mesos::internal::master::Master;
using mesos::internal::slave::GarbageCollector;
using mesos::internal::slave::GarbageCollectorProcess;
-using mesos::internal::slave::Isolator;
using mesos::internal::slave::Slave;
using process::Clock;
@@ -472,14 +470,14 @@ TEST_F(GarbageCollectorIntegrationTest, ExitedExecutor)
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
Future<SlaveRegisteredMessage> slaveRegisteredMessage =
FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
slave::Flags flags = CreateSlaveFlags();
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
AWAIT_READY(slaveRegisteredMessage);
@@ -537,10 +535,7 @@ TEST_F(GarbageCollectorIntegrationTest, ExitedExecutor)
.Times(AtMost(1)); // Ignore TASK_LOST from killed executor.
// Kill the executor and inform the slave.
- dispatch(isolator,
- &Isolator::killExecutor,
- frameworkId.get(),
- DEFAULT_EXECUTOR_ID);
+ containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID);
AWAIT_READY(schedule);
@@ -574,14 +569,14 @@ TEST_F(GarbageCollectorIntegrationTest, DiskUsage)
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
Future<SlaveRegisteredMessage> slaveRegisteredMessage =
FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
slave::Flags flags = CreateSlaveFlags();
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
AWAIT_READY(slaveRegisteredMessage);
@@ -639,10 +634,7 @@ TEST_F(GarbageCollectorIntegrationTest, DiskUsage)
.Times(AtMost(1)); // Ignore TASK_LOST from killed executor.
// Kill the executor and inform the slave.
- dispatch(isolator,
- &Isolator::killExecutor,
- frameworkId.get(),
- DEFAULT_EXECUTOR_ID);
+ containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID);
AWAIT_READY(schedule);
@@ -698,15 +690,15 @@ TEST_F(GarbageCollectorIntegrationTest, Unschedule)
MockExecutor exec1(executor1.executor_id());
MockExecutor exec2(executor2.executor_id());
- map<ExecutorID, Executor*> execs;
+ hashmap<ExecutorID, Executor*> execs;
execs[executor1.executor_id()] = &exec1;
execs[executor2.executor_id()] = &exec2;
- TestingIsolator isolator(execs);
+ TestContainerizer containerizer(execs);
slave::Flags flags = CreateSlaveFlags();
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
AWAIT_READY(slaveRegistered);
@@ -784,11 +776,7 @@ TEST_F(GarbageCollectorIntegrationTest, Unschedule)
Clock::pause();
// Kill the first executor.
- process::dispatch(
- isolator,
- &Isolator::killExecutor,
- frameworkId.get(),
- exec1.id);
+ containerizer.destroy(frameworkId.get(), exec1.id);
AWAIT_READY(scheduleExecutorRunWork);
AWAIT_READY(scheduleExecutorWork);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/isolator.hpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator.hpp b/src/tests/isolator.hpp
deleted file mode 100644
index 6431dd2..0000000
--- a/src/tests/isolator.hpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TESTS_ISOLATOR_HPP__
-#define __TESTS_ISOLATOR_HPP__
-
-#include "unistd.h"
-
-#include <map>
-#include <string>
-
-#include <process/dispatch.hpp>
-#include <process/future.hpp>
-#include <process/pid.hpp>
-
-#include <stout/os.hpp>
-#include <stout/try.hpp>
-#include <stout/uuid.hpp>
-
-#include "mesos/executor.hpp"
-#include "mesos/mesos.hpp"
-
-#include "slave/isolator.hpp"
-
-#include "tests/mesos.hpp" // For MockExecutor.
-
-namespace mesos {
-namespace internal {
-namespace tests {
-
-class TestingIsolator : public slave::Isolator
-{
-public:
- TestingIsolator()
- {
- setup();
- }
-
- TestingIsolator(const std::map<ExecutorID, Executor*>& _executors)
- : executors(_executors)
- {
- setup();
- }
-
- TestingIsolator(const ExecutorID& executorId, Executor* executor)
- {
- executors[executorId] = executor;
- setup();
- }
-
- TestingIsolator(MockExecutor* executor)
- {
- executors[executor->id] = executor;
- setup();
- }
-
- virtual ~TestingIsolator()
- {
- foreachvalue (MesosExecutorDriver* driver, drivers) {
- driver->stop();
- driver->join();
- delete driver;
- }
- drivers.clear();
- }
-
- virtual void initialize(
- const slave::Flags& flags,
- const Resources& resources,
- bool local,
- const process::PID<slave::Slave>& _slave)
- {
- slave = _slave;
- }
-
- virtual void launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const std::string& directory,
- const Resources& resources)
- {
- // TODO(vinod): Currently TestingIsolator doesn't support 2
- // different frameworks launching an executor with the same
- // executorID! This is tricky to support because most of the
- // tests do not known the framework id when they setup the
- // TestingIsolator.
- if (drivers.count(executorInfo.executor_id()) > 0) {
- FAIL() << "Failed to launch executor " << executorInfo.executor_id()
- << " of framework " << frameworkId
- << " because it is already launched";
- }
-
- if (executors.count(executorInfo.executor_id()) == 0) {
- FAIL() << "Failed to launch executor " << executorInfo.executor_id()
- << " of framework " << frameworkId
- << " because it is unknown to the isolator";
- }
-
- Executor* executor = executors[executorInfo.executor_id()];
- MesosExecutorDriver* driver = new MesosExecutorDriver(executor);
- drivers[executorInfo.executor_id()] = driver;
-
- os::setenv("MESOS_LOCAL", "1");
- os::setenv("MESOS_DIRECTORY", directory);
- os::setenv("MESOS_SLAVE_PID", slave);
- os::setenv("MESOS_SLAVE_ID", slaveId.value());
- os::setenv("MESOS_FRAMEWORK_ID", frameworkId.value());
- os::setenv("MESOS_EXECUTOR_ID", executorInfo.executor_id().value());
- os::setenv("MESOS_CHECKPOINT", frameworkInfo.checkpoint() ? "1" : "0");
-
- driver->start();
-
- os::unsetenv("MESOS_LOCAL");
- os::unsetenv("MESOS_DIRECTORY");
- os::unsetenv("MESOS_SLAVE_PID");
- os::unsetenv("MESOS_SLAVE_ID");
- os::unsetenv("MESOS_FRAMEWORK_ID");
- os::unsetenv("MESOS_EXECUTOR_ID");
- os::unsetenv("MESOS_CHECKPOINT");
-
- process::dispatch(
- slave,
- &slave::Slave::executorStarted,
- frameworkId,
- executorInfo.executor_id(),
- getpid());
- }
-
- virtual void killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
- {
- if (drivers.count(executorId) > 0) {
- MesosExecutorDriver* driver = drivers[executorId];
- driver->stop();
- driver->join();
- delete driver;
- drivers.erase(executorId);
-
- process::dispatch(
- slave,
- &slave::Slave::executorTerminated,
- frameworkId,
- executorId,
- 0,
- false,
- "Killed executor");
- } else {
- FAIL() << "Failed to kill executor " << executorId
- << " of framework " << frameworkId
- << " because it is not launched";
- }
- }
-
- // Mocked so tests can check that the resources reflect all started tasks.
- MOCK_METHOD3(resourcesChanged, void(const FrameworkID&,
- const ExecutorID&,
- const Resources&));
-
- MOCK_METHOD2(
- usage,
- process::Future<ResourceStatistics>(
- const FrameworkID&,
- const ExecutorID&));
-
- MOCK_METHOD1(
- recover,
- process::Future<Nothing>(const Option<slave::state::SlaveState>&));
-
-private:
- // Helper to setup default expectations.
- void setup()
- {
- EXPECT_CALL(*this, resourcesChanged(testing::_, testing::_, testing::_))
- .Times(testing::AnyNumber());
-
- EXPECT_CALL(*this, usage(testing::_, testing::_))
- .WillRepeatedly(testing::Return(ResourceStatistics()));
-
- EXPECT_CALL(*this, recover(testing::_))
- .WillRepeatedly(testing::Return(Nothing()));
- }
-
- std::map<ExecutorID, Executor*> executors;
- std::map<ExecutorID, MesosExecutorDriver*> drivers;
- process::PID<slave::Slave> slave;
-};
-
-} // namespace tests {
-} // namespace internal {
-} // namespace mesos {
-
-#endif // __TESTS_ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator_tests.cpp b/src/tests/isolator_tests.cpp
index 45a41ca..5c36659 100644
--- a/src/tests/isolator_tests.cpp
+++ b/src/tests/isolator_tests.cpp
@@ -26,6 +26,8 @@
#include <mesos/resources.hpp>
#include <process/future.hpp>
+#include <process/owned.hpp>
+#include <process/reap.hpp>
#include <stout/os.hpp>
#include <stout/path.hpp>
@@ -34,13 +36,19 @@
#include "master/detector.hpp"
#include "slave/flags.hpp"
-#ifdef __linux__
-#include "slave/cgroups_isolator.hpp"
-#endif
-#include "slave/process_isolator.hpp"
#include "slave/slave.hpp"
+#include "slave/containerizer/isolator.hpp"
+#include "slave/containerizer/launcher.hpp"
+
+#include "slave/containerizer/isolators/posix.hpp"
+#ifdef __linux__
+#include "slave/containerizer/isolators/cgroups/cpushare.hpp"
+#include "slave/containerizer/isolators/cgroups/mem.hpp"
+#endif // __linux__
+
#include "tests/mesos.hpp"
+#include "tests/utils.hpp"
using namespace mesos;
using namespace mesos::internal;
@@ -49,13 +57,17 @@ using namespace mesos::internal::tests;
using namespace process;
using mesos::internal::master::Master;
-
#ifdef __linux__
-using mesos::internal::slave::CgroupsIsolator;
-#endif
+using mesos::internal::slave::CgroupsCpushareIsolatorProcess;
+using mesos::internal::slave::CgroupsMemIsolatorProcess;
+#endif // __linux__
using mesos::internal::slave::Isolator;
-using mesos::internal::slave::ProcessIsolator;
-using mesos::internal::slave::Slave;
+using mesos::internal::slave::IsolatorProcess;
+using mesos::internal::slave::Launcher;
+using mesos::internal::slave::PosixLauncher;
+using mesos::internal::slave::PosixCpuIsolatorProcess;
+using mesos::internal::slave::PosixMemIsolatorProcess;
+using mesos::internal::slave::Flags;
using std::string;
using std::vector;
@@ -66,139 +78,348 @@ using testing::Return;
using testing::SaveArg;
+int execute(const std::string& command, int pipes[2])
+{
+ // In child process
+ ::close(pipes[1]);
+
+ // Wait until the parent signals us to continue.
+ int buf;
+ ::read(pipes[0], &buf, sizeof(buf));
+ ::close(pipes[0]);
+
+ execl("/bin/sh", "sh", "-c", command.c_str(), (char*) NULL);
+
+ std::cerr << "Should not reach here!" << std::endl;
+ abort();
+}
+
+
+template <typename T>
+class CpuIsolatorTest : public MesosTest {};
+
#ifdef __linux__
-typedef ::testing::Types<ProcessIsolator, CgroupsIsolator> IsolatorTypes;
+typedef ::testing::Types<PosixCpuIsolatorProcess,
+ CgroupsCpushareIsolatorProcess> CpuIsolatorTypes;
#else
-typedef ::testing::Types<ProcessIsolator> IsolatorTypes;
-#endif
+typedef ::testing::Types<PosixCpuIsolatorProcess> CpuIsolatorTypes;
+#endif // __linux__
-TYPED_TEST_CASE(IsolatorTest, IsolatorTypes);
+TYPED_TEST_CASE(CpuIsolatorTest, CpuIsolatorTypes);
-TYPED_TEST(IsolatorTest, Usage)
+TYPED_TEST(CpuIsolatorTest, UserCpuUsage)
{
- Try<PID<Master> > master = this->StartMaster();
- ASSERT_SOME(master);
+ Flags flags;
- TypeParam isolator;
+ Try<Isolator*> isolator = TypeParam::create(flags);
+ CHECK_SOME(isolator);
- slave::Flags flags = this->CreateSlaveFlags();
+ // A PosixLauncher is sufficient even when testing a cgroups isolator.
+ Try<Launcher*> launcher = PosixLauncher::create(flags);
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
- ASSERT_SOME(slave);
+ ExecutorInfo executorInfo;
+ executorInfo.mutable_resources()->CopyFrom(
+ Resources::parse("cpus:1.0").get());
- MockScheduler sched;
- MesosSchedulerDriver driver(
- &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+ ContainerID containerId;
+ containerId.set_value("user_cpu_usage");
- Future<FrameworkID> frameworkId;
- EXPECT_CALL(sched, registered(&driver, _, _))
- .WillOnce(FutureArg<1>(&frameworkId));
+ AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
- Future<vector<Offer> > offers;
- EXPECT_CALL(sched, resourceOffers(&driver, _))
- .WillOnce(FutureArg<1>(&offers))
- .WillRepeatedly(Return()); // Ignore subsequent offers.
+ Try<string> dir = os::mkdtemp();
+ ASSERT_SOME(dir);
+ const string& file = path::join(dir.get(), "mesos_isolator_test_ready");
- driver.start();
+ // Max out a single core in userspace. This will run for at most one second.
+ string command = "while true ; do true ; done &"
+ "touch " + file + "; " // Signals the command is running.
+ "sleep 60";
- AWAIT_READY(frameworkId);
- AWAIT_READY(offers);
+ int pipes[2];
+ ASSERT_NE(-1, ::pipe(pipes));
- EXPECT_NE(0u, offers.get().size());
+ lambda::function<int()> inChild = lambda::bind(&execute, command, pipes);
- TaskInfo task;
- task.set_name("isolator_test");
- task.mutable_task_id()->set_value("1");
- task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
- task.mutable_resources()->MergeFrom(offers.get()[0].resources());
+ Try<pid_t> pid = launcher.get()->fork(containerId, inChild);
+ ASSERT_SOME(pid);
- Resources resources(offers.get()[0].resources());
- Option<Bytes> mem = resources.mem();
- ASSERT_SOME(mem);
- Option<double> cpus = resources.cpus();
- ASSERT_SOME(cpus);
+ // Reap the forked child.
+ Future<Option<int> > status = process::reap(pid.get());
- const std::string& file = path::join(flags.work_dir, "ready");
+ // Continue in the parent.
+ ::close(pipes[0]);
- // This task induces user/system load in a child process by
- // running top in a child process for ten seconds.
- task.mutable_command()->set_value(
-#ifdef __APPLE__
- // Use logging mode with 30,000 samples with no interval.
- "top -l 30000 -s 0 2>&1 > /dev/null & "
-#else
- // Batch mode, with 30,000 samples with no interval.
- "top -b -d 0 -n 30000 2>&1 > /dev/null & "
-#endif
- "touch " + file + "; " // Signals that the top command is running.
- "sleep 60");
+ // Isolate the forked child.
+ AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));
+
+ // Now signal the child to continue.
+ int buf;
+ ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf)));
+ ::close(pipes[1]);
+
+ // Wait for the command to start.
+ while (!os::exists(file));
+
+ // Wait up to 1 second for the child process to induce 1/8 of a second of
+ // user cpu time.
+ ResourceStatistics statistics;
+ Duration waited = Duration::zero();
+ do {
+ Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
+ AWAIT_READY(usage);
+
+ statistics = usage.get();
+
+ // If we meet our usage expectations, we're done!
+ if (statistics.cpus_user_time_secs() >= 0.125) {
+ break;
+ }
- vector<TaskInfo> tasks;
- tasks.push_back(task);
+ os::sleep(Milliseconds(200));
+ waited += Milliseconds(200);
+ } while (waited < Seconds(1));
- Future<TaskStatus> status;
- EXPECT_CALL(sched, statusUpdate(&driver, _))
- .WillOnce(FutureArg<1>(&status));
+ EXPECT_LE(0.125, statistics.cpus_user_time_secs());
- driver.launchTasks(offers.get()[0].id(), tasks);
+ // Shouldn't be any appreciable system time.
+ EXPECT_GT(0.025, statistics.cpus_system_time_secs());
+ // Ensure all processes are killed.
+ AWAIT_READY(launcher.get()->destroy(containerId));
+
+ // Make sure the child was reaped.
AWAIT_READY(status);
- EXPECT_EQ(TASK_RUNNING, status.get().state());
+ // Let the isolator clean up.
+ AWAIT_READY(isolator.get()->cleanup(containerId));
- // Wait for the task to begin inducing cpu time.
- while (!os::exists(file));
+ delete isolator.get();
+ delete launcher.get();
+
+ CHECK_SOME(os::rmdir(dir.get()));
+}
+
+
+TYPED_TEST(CpuIsolatorTest, SystemCpuUsage)
+{
+ Flags flags;
+
+ Try<Isolator*> isolator = TypeParam::create(flags);
+ CHECK_SOME(isolator);
+
+ // A PosixLauncher is sufficient even when testing a cgroups isolator.
+ Try<Launcher*> launcher = PosixLauncher::create(flags);
+
+ ExecutorInfo executorInfo;
+ executorInfo.mutable_resources()->CopyFrom(
+ Resources::parse("cpus:1.0").get());
+
+ ContainerID containerId;
+ containerId.set_value("system_cpu_usage");
- ExecutorID executorId;
- executorId.set_value(task.task_id().value());
+ AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
- // We'll wait up to 10 seconds for the child process to induce
- // 1/8 of a second of user and system cpu time in total.
- // TODO(bmahler): Also induce rss memory consumption, by re-using
- // the balloon framework.
+ Try<string> dir = os::mkdtemp();
+ ASSERT_SOME(dir);
+ const string& file = path::join(dir.get(), "mesos_isolator_test_ready");
+
+ // Generating random numbers is done by the kernel and will max out a single
+ // core and run almost exclusively in the kernel, i.e., system time.
+ string command = "cat /dev/urandom > /dev/null & "
+ "touch " + file + "; " // Signals the command is running.
+ "sleep 60";
+
+ int pipes[2];
+ ASSERT_NE(-1, ::pipe(pipes));
+
+ lambda::function<int()> inChild = lambda::bind(&execute, command, pipes);
+
+ Try<pid_t> pid = launcher.get()->fork(containerId, inChild);
+ ASSERT_SOME(pid);
+
+ // Reap the forked child.
+ Future<Option<int> > status = process::reap(pid.get());
+
+ // Continue in the parent.
+ ::close(pipes[0]);
+
+ // Isolate the forked child.
+ AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));
+
+ // Now signal the child to continue.
+ int buf;
+ ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf)));
+ ::close(pipes[1]);
+
+ // Wait for the command to start.
+ while (!os::exists(file));
+
+ // Wait up to 1 second for the child process to induce 1/8 of a second of
+ // system cpu time.
ResourceStatistics statistics;
Duration waited = Duration::zero();
do {
- Future<ResourceStatistics> usage =
- process::dispatch(
- (Isolator*) &isolator, // TODO(benh): Fix after reaper changes.
- &Isolator::usage,
- frameworkId.get(),
- executorId);
-
+ Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
AWAIT_READY(usage);
statistics = usage.get();
// If we meet our usage expectations, we're done!
- if (statistics.cpus_user_time_secs() >= 0.125 &&
- statistics.cpus_system_time_secs() >= 0.125 &&
- statistics.mem_rss_bytes() >= 1024u) {
+ if (statistics.cpus_system_time_secs() >= 0.125) {
break;
}
- os::sleep(Milliseconds(100));
- waited += Milliseconds(100);
- } while (waited < Seconds(10));
-
+ os::sleep(Milliseconds(200));
+ waited += Milliseconds(200);
+ } while (waited < Seconds(1));
- EXPECT_GE(statistics.cpus_user_time_secs(), 0.125);
- EXPECT_GE(statistics.cpus_system_time_secs(), 0.125);
- EXPECT_EQ(statistics.cpus_limit(), cpus.get());
- EXPECT_GE(statistics.mem_rss_bytes(), 1024u);
- EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes());
+ EXPECT_LE(0.125, statistics.cpus_system_time_secs());
- EXPECT_CALL(sched, statusUpdate(&driver, _))
- .WillOnce(FutureArg<1>(&status));
+ // Shouldn't be any appreciable user time.
+ EXPECT_GT(0.025, statistics.cpus_user_time_secs());
- driver.killTask(task.task_id());
+ // Ensure all processes are killed.
+ AWAIT_READY(launcher.get()->destroy(containerId));
+ // Make sure the child was reaped.
AWAIT_READY(status);
- EXPECT_EQ(TASK_KILLED, status.get().state());
+ // Let the isolator clean up.
+ AWAIT_READY(isolator.get()->cleanup(containerId));
+
+ delete isolator.get();
+ delete launcher.get();
+
+ CHECK_SOME(os::rmdir(dir.get()));
+}
+
+
+template <typename T>
+class MemIsolatorTest : public MesosTest {};
+
+#ifdef __linux__
+typedef ::testing::Types<PosixMemIsolatorProcess,
+ CgroupsMemIsolatorProcess> MemIsolatorTypes;
+#else
+typedef ::testing::Types<PosixMemIsolatorProcess> MemIsolatorTypes;
+#endif // __linux__
+
+TYPED_TEST_CASE(MemIsolatorTest, MemIsolatorTypes);
+
+
+// This function should be async-signal-safe but it isn't: at least
+// posix_memalign, mlock, memset and perror are not safe.
+int consumeMemory(const Bytes& _size, const Duration& duration, int pipes[2])
+{
+ // In child process
+ ::close(pipes[1]);
+
+ int buf;
+ // Wait until the parent signals us to continue.
+ ::read(pipes[0], &buf, sizeof(buf));
+ ::close(pipes[0]);
+
+ size_t size = static_cast<size_t>(_size.bytes());
+ void* buffer = NULL;
+
+ if (posix_memalign(&buffer, getpagesize(), size) != 0) {
+ perror("Failed to allocate page-aligned memory, posix_memalign");
+ abort();
+ }
+
+ // We use mlock and memset here to make sure that the memory
+ // actually gets paged in and thus accounted for.
+ if (mlock(buffer, size) != 0) {
+ perror("Failed to lock memory, mlock");
+ abort();
+ }
+
+ if (memset(buffer, 1, size) != buffer) {
+ perror("Failed to fill memory, memset");
+ abort();
+ }
+
+ os::sleep(duration);
+
+ return 0;
+}
+
+
+TYPED_TEST(MemIsolatorTest, MemUsage)
+{
+ Flags flags;
+
+ Try<Isolator*> isolator = TypeParam::create(flags);
+ CHECK_SOME(isolator);
+
+ // A PosixLauncher is sufficient even when testing a cgroups isolator.
+ Try<Launcher*> launcher = PosixLauncher::create(flags);
+
+ ExecutorInfo executorInfo;
+ executorInfo.mutable_resources()->CopyFrom(
+ Resources::parse("mem:1024").get());
+
+ ContainerID containerId;
+ containerId.set_value("memory_usage");
+
+ AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
+
+ int pipes[2];
+ ASSERT_NE(-1, ::pipe(pipes));
+
+ lambda::function<int()> inChild = lambda::bind(
+ &consumeMemory,
+ Megabytes(256),
+ Seconds(10),
+ pipes);
+
+ Try<pid_t> pid = launcher.get()->fork(containerId, inChild);
+ ASSERT_SOME(pid);
+
+ // Set up the reaper to wait on the forked child.
+ Future<Option<int> > status = process::reap(pid.get());
+
+ // Continue in the parent.
+ ::close(pipes[0]);
+
+ // Isolate the forked child.
+ AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));
+
+ // Now signal the child to continue.
+ int buf;
+ ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf)));
+ ::close(pipes[1]);
+
+ // Wait up to 5 seconds for the child process to consume 256 MB of memory;
+ ResourceStatistics statistics;
+ Bytes threshold = Megabytes(256);
+ Duration waited = Duration::zero();
+ do {
+ Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
+ AWAIT_READY(usage);
+
+ statistics = usage.get();
+
+ // If we meet our usage expectations, we're done!
+ if (statistics.mem_rss_bytes() >= threshold.bytes()) {
+ break;
+ }
+
+ os::sleep(Seconds(1));
+ waited += Seconds(1);
+ } while (waited < Seconds(5));
+
+ EXPECT_LE(threshold.bytes(), statistics.mem_rss_bytes());
+
+ // Ensure all processes are killed.
+ AWAIT_READY(launcher.get()->destroy(containerId));
+
+ // Make sure the child was reaped.
+ AWAIT_READY(status);
- driver.stop();
- driver.join();
+ // Let the isolator clean up.
+ AWAIT_READY(isolator.get()->cleanup(containerId));
- this->Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ delete isolator.get();
+ delete launcher.get();
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/master_contender_detector_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_contender_detector_tests.cpp b/src/tests/master_contender_detector_tests.cpp
index b42574b..8da7420 100644
--- a/src/tests/master_contender_detector_tests.cpp
+++ b/src/tests/master_contender_detector_tests.cpp
@@ -51,7 +51,6 @@
#include "slave/slave.hpp"
-#include "tests/isolator.hpp"
#include "tests/mesos.hpp"
#ifdef MESOS_HAS_JAVA
#include "tests/zookeeper.hpp"
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 9c14114..42c5a77 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -41,12 +41,12 @@
#include "master/master.hpp"
#include "slave/constants.hpp"
+#include "slave/containerizer/mesos_containerizer.hpp"
#include "slave/gc.hpp"
#include "slave/flags.hpp"
-#include "slave/process_isolator.hpp"
#include "slave/slave.hpp"
-#include "tests/isolator.hpp"
+#include "tests/containerizer.hpp"
#include "tests/mesos.hpp"
#ifdef MESOS_HAS_JAVA
@@ -60,9 +60,9 @@ using namespace mesos::internal::tests;
using mesos::internal::master::Master;
using mesos::internal::slave::GarbageCollectorProcess;
-using mesos::internal::slave::Isolator;
-using mesos::internal::slave::ProcessIsolator;
using mesos::internal::slave::Slave;
+using mesos::internal::slave::Containerizer;
+using mesos::internal::slave::MesosContainerizerProcess;
using process::Clock;
using process::Future;
@@ -89,9 +89,10 @@ TEST_F(MasterTest, TaskRunning)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ TestContainerizer containerizer(&exec);
+
+ Try<PID<Slave> > slave = StartSlave(&containerizer);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -127,10 +128,12 @@ TEST_F(MasterTest, TaskRunning)
EXPECT_CALL(exec, launchTask(_, _))
.WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
- Future<Nothing> resourcesChanged;
- EXPECT_CALL(isolator,
- resourcesChanged(_, _, Resources(offers.get()[0].resources())))
- .WillOnce(FutureSatisfy(&resourcesChanged));
+ Future<Nothing> resourcesUpdated;
+ Future<Nothing> update;
+ EXPECT_CALL(containerizer,
+ update(_, Resources(offers.get()[0].resources())))
+ .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated),
+ Return(update)));
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(&driver, _))
@@ -141,7 +144,7 @@ TEST_F(MasterTest, TaskRunning)
AWAIT_READY(status);
EXPECT_EQ(TASK_RUNNING, status.get().state());
- AWAIT_READY(resourcesChanged);
+ AWAIT_READY(resourcesUpdated);
EXPECT_CALL(exec, shutdown(_))
.Times(AtMost(1));
@@ -149,7 +152,7 @@ TEST_F(MasterTest, TaskRunning)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -159,12 +162,13 @@ TEST_F(MasterTest, ShutdownFrameworkWhileTaskRunning)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+
+ TestContainerizer containerizer(&exec);
slave::Flags flags = CreateSlaveFlags();
flags.executor_shutdown_grace_period = Seconds(0);
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -200,10 +204,12 @@ TEST_F(MasterTest, ShutdownFrameworkWhileTaskRunning)
EXPECT_CALL(exec, launchTask(_, _))
.WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
- Future<Nothing> resourcesChanged;
- EXPECT_CALL(isolator,
- resourcesChanged(_, _, Resources(offers.get()[0].resources())))
- .WillOnce(FutureSatisfy(&resourcesChanged));
+ Future<Nothing> resourcesUpdated;
+ Future<Nothing> update;
+ EXPECT_CALL(containerizer,
+ update(_, Resources(offers.get()[0].resources())))
+ .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated),
+ Return(update)));
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(&driver, _))
@@ -214,7 +220,7 @@ TEST_F(MasterTest, ShutdownFrameworkWhileTaskRunning)
AWAIT_READY(status);
EXPECT_EQ(TASK_RUNNING, status.get().state());
- AWAIT_READY(resourcesChanged);
+ AWAIT_READY(resourcesUpdated);
EXPECT_CALL(exec, shutdown(_))
.Times(AtMost(1));
@@ -222,7 +228,7 @@ TEST_F(MasterTest, ShutdownFrameworkWhileTaskRunning)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -376,13 +382,14 @@ TEST_F(MasterTest, RecoverResources)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+
+ TestContainerizer containerizer(&exec);
slave::Flags flags = CreateSlaveFlags();
flags.resources = Option<string>(
"cpus:2;mem:1024;disk:1024;ports:[1-10, 20-30]");
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -470,10 +477,7 @@ TEST_F(MasterTest, RecoverResources)
.Times(AtMost(1));
// Now kill the executor, scheduler should get an offer it's resources.
- dispatch(isolator,
- &Isolator::killExecutor,
- offer.framework_id(),
- executorInfo.executor_id());
+ containerizer.destroy(offer.framework_id(), executorInfo.executor_id());
// TODO(benh): We can't do driver.reviveOffers() because we need to
// wait for the killed executors resources to get aggregated! We
@@ -488,7 +492,7 @@ TEST_F(MasterTest, RecoverResources)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -588,13 +592,13 @@ TEST_F(MasterTest, MultipleExecutors)
MockExecutor exec1(executor1.executor_id());
MockExecutor exec2(executor2.executor_id());
- map<ExecutorID, Executor*> execs;
+ hashmap<ExecutorID, Executor*> execs;
execs[executor1.executor_id()] = &exec1;
execs[executor2.executor_id()] = &exec2;
- TestingIsolator isolator(execs);
+ TestContainerizer containerizer(execs);
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ Try<PID<Slave> > slave = StartSlave(&containerizer);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -676,7 +680,7 @@ TEST_F(MasterTest, MultipleExecutors)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -685,12 +689,15 @@ TEST_F(MasterTest, ShutdownUnregisteredExecutor)
Try<PID<Master> > master = StartMaster();
ASSERT_SOME(master);
- ProcessIsolator isolator;
-
// Need flags for 'executor_registration_timeout'.
slave::Flags flags = CreateSlaveFlags();
+ // Set the isolation flag so we know a MesoContainerizer will be created.
+ flags.isolation = "posix/cpu,posix/mem";
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ Try<Containerizer*> containerizer = Containerizer::create(flags, false);
+ CHECK_SOME(containerizer);
+
+ Try<PID<Slave> > slave = StartSlave(containerizer.get());
ASSERT_SOME(slave);
MockScheduler sched;
@@ -740,14 +747,14 @@ TEST_F(MasterTest, ShutdownUnregisteredExecutor)
.WillOnce(FutureArg<1>(&status));
// Ensure that the slave times out and kills the executor.
- Future<Nothing> killExecutor =
- FUTURE_DISPATCH(_, &Isolator::killExecutor);
+ Future<Nothing> destroyExecutor =
+ FUTURE_DISPATCH(_, &MesosContainerizerProcess::destroy);
Clock::advance(flags.executor_registration_timeout);
- AWAIT_READY(killExecutor);
+ AWAIT_READY(destroyExecutor);
- Clock::settle(); // Wait for ProcessIsolator::killExecutor to complete.
+ Clock::settle(); // Wait for Containerizer::destroy to complete.
// Now advance time until the reaper reaps the executor.
while (status.isPending()) {
@@ -763,7 +770,7 @@ TEST_F(MasterTest, ShutdownUnregisteredExecutor)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -775,9 +782,10 @@ TEST_F(MasterTest, RemoveUnregisteredTerminatedExecutor)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ TestContainerizer containerizer(&exec);
+
+ Try<PID<Slave> > slave = StartSlave(&containerizer);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -823,10 +831,7 @@ TEST_F(MasterTest, RemoveUnregisteredTerminatedExecutor)
FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule);
// Now kill the executor.
- dispatch(isolator,
- &Isolator::killExecutor,
- offers.get()[0].framework_id(),
- DEFAULT_EXECUTOR_ID);
+ containerizer.destroy(offers.get()[0].framework_id(), DEFAULT_EXECUTOR_ID);
AWAIT_READY(status);
EXPECT_EQ(TASK_LOST, status.get().state());
@@ -841,7 +846,7 @@ TEST_F(MasterTest, RemoveUnregisteredTerminatedExecutor)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -1043,9 +1048,10 @@ TEST_F(MasterTest, ReconcileTaskTest)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
- Try<PID<Slave> > slave = StartSlave(&isolator);
+ TestContainerizer containerizer(&exec);
+
+ Try<PID<Slave> > slave = StartSlave(&containerizer);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1104,7 +1110,7 @@ TEST_F(MasterTest, ReconcileTaskTest)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -1120,7 +1126,7 @@ TEST_F(MasterTest, LaunchCombinedOfferTest)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
// The CPU granularity is 1.0 which means that we need slaves with at least
// 2 cpus for a combined offer.
@@ -1130,7 +1136,7 @@ TEST_F(MasterTest, LaunchCombinedOfferTest)
slave::Flags flags = CreateSlaveFlags();
flags.resources = Option<string>(stringify(fullSlave));
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1248,7 +1254,7 @@ TEST_F(MasterTest, LaunchCombinedOfferTest)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -1259,7 +1265,7 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
// See LaunchCombinedOfferTest() for resource size motivation.
Resources fullSlave = Resources::parse("cpus:2;mem:1024").get();
@@ -1268,7 +1274,7 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
slave::Flags flags = CreateSlaveFlags();
flags.resources = Option<string>(stringify(fullSlave));
- Try<PID<Slave> > slave1 = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave1 = StartSlave(&containerizer, flags);
ASSERT_SOME(slave1);
MockScheduler sched;
@@ -1294,7 +1300,7 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
EXPECT_CALL(sched, resourceOffers(&driver, _))
.WillOnce(FutureArg<1>(&offers2));
- Try<PID<Slave> > slave2 = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave2 = StartSlave(&containerizer, flags);
ASSERT_SOME(slave2);
AWAIT_READY(offers2);
@@ -1331,7 +1337,7 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
@@ -1343,7 +1349,7 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
- TestingIsolator isolator(&exec);
+ TestContainerizer containerizer(&exec);
// See LaunchCombinedOfferTest() for resource size motivation.
Resources fullSlave = Resources::parse("cpus:2;mem:1024").get();
@@ -1351,7 +1357,7 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
slave::Flags flags = CreateSlaveFlags();
flags.resources = Option<string>(stringify(fullSlave));
- Try<PID<Slave> > slave = StartSlave(&isolator, flags);
+ Try<PID<Slave> > slave = StartSlave(&containerizer, flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1403,7 +1409,7 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
driver.stop();
driver.join();
- Shutdown(); // Must shutdown before 'isolator' gets deallocated.
+ Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
}
[08/11] git commit: Containerizer (part 1)
Posted by vi...@apache.org.
Containerizer (part 1)
The proposed Containerizer interface is to replace the existing
Isolator.
One ContainerizerProcess has been written:
MesosContainerizerProcess - implements containerizeration internally
using a Launcher and one or more Isolators (following review)
The intent is to also support a generic ExternalContainerizerProcess
that can delegate containerizeration by making external calls. Other
Containerizers could interface with specific external containerization
techniques such as Docker or LXC.
Review: https://reviews.apache.org/r/16147
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f90fe764
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f90fe764
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f90fe764
Branch: refs/heads/master
Commit: f90fe7641ea8f7066a6a1171a24ddaa8dc30e789
Parents: c578fe5
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:24:59 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:50 2014 -0800
----------------------------------------------------------------------
include/mesos/mesos.proto | 25 +-
src/Makefile.am | 26 +-
src/common/type_utils.hpp | 40 +
src/launcher/fetcher.cpp | 242 +++
src/launcher/launcher.cpp | 489 ------
src/launcher/launcher.hpp | 125 --
src/launcher/main.cpp | 100 --
src/local/local.cpp | 23 +-
src/slave/cgroups_isolator.cpp | 1412 ------------------
src/slave/cgroups_isolator.hpp | 320 ----
src/slave/containerizer/containerizer.cpp | 269 ++++
src/slave/containerizer/containerizer.hpp | 137 ++
src/slave/containerizer/mesos_containerizer.cpp | 907 +++++++++++
src/slave/containerizer/mesos_containerizer.hpp | 208 +++
src/slave/flags.hpp | 15 +-
src/slave/http.cpp | 2 +-
src/slave/isolator.cpp | 53 -
src/slave/isolator.hpp | 110 --
src/slave/main.cpp | 20 +-
src/slave/monitor.cpp | 132 +-
src/slave/monitor.hpp | 54 +-
src/slave/paths.hpp | 32 +-
src/slave/process_isolator.cpp | 516 -------
src/slave/process_isolator.hpp | 117 --
src/slave/slave.cpp | 487 +++---
src/slave/slave.hpp | 32 +-
src/slave/state.cpp | 45 +-
src/slave/state.hpp | 10 +-
src/slave/status_update_manager.cpp | 33 +-
src/slave/status_update_manager.hpp | 8 +-
30 files changed, 2243 insertions(+), 3746 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 7079e03..69a4a60 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -83,6 +83,17 @@ message ExecutorID {
/**
+ * A slave generated ID to distinguish a container. The ID must be unique
+ * between any active or completed containers on the slave. In particular,
+ * containers for different runs of the same (framework, executor) pair must be
+ * unique.
+ */
+message ContainerID {
+ required string value = 1;
+}
+
+
+/**
* Describes a framework. If the user field is set to an empty string
* Mesos will automagically set it to the current user. Note that the
* ID is only available after a framework has registered, however, it
@@ -109,13 +120,13 @@ message FrameworkInfo {
/**
- * Describes a command, executed via: '/bin/sh -c value'. Any uri's
- * specified are fetched before executing the command.
- * If executable field for an uri is set, executable file permission
- * is set on the downloaded file. Also, if the downloaded file has ".tgz"
- * extension it is extracted into the executor's working directory.
- * In addition, any environment variables are set before executing
- * the command (so they can be used to "parameterize" your command).
+ * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified
+ * are fetched before executing the command. If the executable field for an
+ * uri is set, executable file permission is set on the downloaded file.
+ * Otherwise, if the downloaded file has a recognized archive extension
+ * (currently [compressed] tar and zip) it is extracted into the executor's
+ * working directory. In addition, any environment variables are set before
+ * executing the command (so they can be used to "parameterize" your command).
*/
message CommandInfo {
message URI {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index c307068..9d39666 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -176,10 +176,9 @@ libmesos_no_3rdparty_la_SOURCES = \
slave/state.cpp \
slave/slave.cpp \
slave/http.cpp \
- slave/isolator.cpp \
- slave/process_isolator.cpp \
+ slave/containerizer/containerizer.cpp \
+ slave/containerizer/mesos_containerizer.cpp \
slave/status_update_manager.cpp \
- launcher/launcher.cpp \
exec/exec.cpp \
common/lock.cpp \
common/date_utils.cpp \
@@ -204,11 +203,9 @@ pkginclude_HEADERS = $(top_srcdir)/include/mesos/executor.hpp \
nodist_pkginclude_HEADERS = ../include/mesos/mesos.hpp mesos.pb.h
if OS_LINUX
- libmesos_no_3rdparty_la_SOURCES += slave/cgroups_isolator.cpp
libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
libmesos_no_3rdparty_la_SOURCES += linux/fs.cpp
else
- EXTRA_DIST += slave/cgroups_isolator.cpp
EXTRA_DIST += linux/cgroups.cpp
EXTRA_DIST += linux/fs.cpp
endif
@@ -220,7 +217,7 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp \
common/type_utils.hpp common/thread.hpp \
examples/utils.hpp files/files.hpp \
hdfs/hdfs.hpp \
- launcher/launcher.hpp linux/cgroups.hpp \
+ linux/cgroups.hpp \
linux/fs.hpp local/flags.hpp local/local.hpp \
logging/flags.hpp logging/logging.hpp \
master/allocator.hpp \
@@ -232,17 +229,16 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp \
master/registrar.hpp \
master/master.hpp master/sorter.hpp \
messages/messages.hpp slave/constants.hpp \
+ slave/containerizer/containerizer.hpp \
+ slave/containerizer/mesos_containerizer.hpp \
slave/flags.hpp slave/gc.hpp slave/monitor.hpp \
- slave/isolator.hpp \
- slave/cgroups_isolator.hpp \
slave/paths.hpp slave/state.hpp \
slave/status_update_manager.hpp \
- slave/process_isolator.hpp \
slave/slave.hpp \
tests/environment.hpp tests/script.hpp \
tests/zookeeper.hpp tests/flags.hpp tests/utils.hpp \
tests/cluster.hpp \
- tests/isolator.hpp \
+ tests/containerizer.hpp \
tests/mesos.hpp \
tests/zookeeper_test_server.hpp zookeeper/authentication.hpp \
zookeeper/contender.hpp \
@@ -391,10 +387,10 @@ mesos_local_SOURCES = local/main.cpp
mesos_local_CPPFLAGS = $(MESOS_CPPFLAGS)
mesos_local_LDADD = libmesos.la
-pkglibexec_PROGRAMS += mesos-launcher
-mesos_launcher_SOURCES = launcher/main.cpp
-mesos_launcher_CPPFLAGS = $(MESOS_CPPFLAGS)
-mesos_launcher_LDADD = libmesos.la
+pkglibexec_PROGRAMS += mesos-fetcher
+mesos_fetcher_SOURCES = launcher/fetcher.cpp
+mesos_fetcher_CPPFLAGS = $(MESOS_CPPFLAGS)
+mesos_fetcher_LDADD = libmesos.la
pkglibexec_PROGRAMS += mesos-executor
mesos_executor_SOURCES = launcher/executor.cpp
@@ -838,6 +834,7 @@ mesos_tests_SOURCES = \
tests/allocator_tests.cpp \
tests/attributes_tests.cpp \
tests/authentication_tests.cpp \
+ tests/containerizer.cpp \
tests/environment.cpp \
tests/examples_tests.cpp \
tests/exception_tests.cpp \
@@ -878,7 +875,6 @@ mesos_tests_LDADD = ../$(LIBPROCESS)/3rdparty/libgmock.la libmesos.la
mesos_tests_DEPENDENCIES = # Initialized to allow += below.
if OS_LINUX
- mesos_tests_SOURCES += tests/cgroups_isolator_tests.cpp
mesos_tests_SOURCES += tests/cgroups_tests.cpp
mesos_tests_SOURCES += tests/fs_tests.cpp
endif
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/common/type_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.hpp b/src/common/type_utils.hpp
index b8fc573..784a808 100644
--- a/src/common/type_utils.hpp
+++ b/src/common/type_utils.hpp
@@ -72,6 +72,14 @@ inline std::ostream& operator << (
}
+inline std::ostream& operator << (
+ std::ostream& stream,
+ const ContainerID& containerId)
+{
+ return stream << containerId.value();
+}
+
+
inline std::ostream& operator << (std::ostream& stream, const TaskState& state)
{
return stream << TaskState_descriptor()->FindValueByNumber(state)->name();
@@ -150,6 +158,18 @@ inline bool operator == (const ExecutorID& left, const ExecutorID& right)
}
+inline bool operator == (const ContainerID& left, const ContainerID& right)
+{
+ return left.value() == right.value();
+}
+
+
+inline bool operator != (const ContainerID& left, const ContainerID& right)
+{
+ return left.value() != right.value();
+}
+
+
inline bool operator == (const FrameworkID& left, const std::string& right)
{
return left.value() == right;
@@ -180,6 +200,12 @@ inline bool operator == (const ExecutorID& left, const std::string& right)
}
+inline bool operator == (const ContainerID& left, const std::string& right)
+{
+ return left.value() == right;
+}
+
+
inline bool operator < (const FrameworkID& left, const FrameworkID& right)
{
return left.value() < right.value();
@@ -210,6 +236,12 @@ inline bool operator < (const ExecutorID& left, const ExecutorID& right)
}
+inline bool operator < (const ContainerID& left, const ContainerID& right)
+{
+ return left.value() < right.value();
+}
+
+
inline bool operator == (const Environment& left, const Environment& right)
{
if (left.variables().size() != right.variables().size()) {
@@ -355,6 +387,14 @@ inline std::size_t hash_value(const ExecutorID& executorId)
}
+inline std::size_t hash_value(const ContainerID& containerId)
+{
+ size_t seed = 0;
+ boost::hash_combine(seed, containerId.value());
+ return seed;
+}
+
+
namespace internal {
inline bool operator == (const Task& left, const Task& right)
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/launcher/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/fetcher.cpp b/src/launcher/fetcher.cpp
new file mode 100644
index 0000000..9c9f07d
--- /dev/null
+++ b/src/launcher/fetcher.cpp
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+
+#include <mesos/mesos.hpp>
+
+#include <stout/net.hpp>
+#include <stout/option.hpp>
+#include <stout/os.hpp>
+#include <stout/strings.hpp>
+
+#include "hdfs/hdfs.hpp"
+
+using namespace mesos;
+
+using std::string;
+
+// Try to extract filename into directory. If filename is recognized as an
+// archive it will be extracted and true returned; if not recognized then false
+// will be returned. An Error is returned if the extraction command fails.
+Try<Nothing> extract(const string& filename, const string& directory)
+{
+ string command;
+ // Extract any .tgz, tar.gz, tar.bz2 or zip files.
+ if (strings::endsWith(filename, ".tgz") ||
+ strings::endsWith(filename, ".tar.gz") ||
+ strings::endsWith(filename, ".tbz2") ||
+ strings::endsWith(filename, ".tar.bz2") ||
+ strings::endsWith(filename, ".txz") ||
+ strings::endsWith(filename, ".tar.xz")) {
+ command = "tar -C '" + directory + "' xJf";
+ } else if (strings::endsWith(filename, ".zip")) {
+ command = "unzip -d '" + directory + "'";
+ } else {
+ return Error("Could not extract file with unrecognized extension");
+ }
+
+ command += " '" + filename + "'";
+ int status = os::system(command);
+ if (status != 0) {
+ return Error("Failed to extract: command " + command +
+ " exited with status: " + stringify(status));
+ }
+
+ LOG(INFO) << "Extracted resource '" << filename
+ << "' into '" << directory << "'";
+
+ return Nothing();
+}
+
+
+// Fetch URI into directory.
+Try<string> fetch(
+ const string& uri,
+ const string& directory)
+{
+ LOG(INFO) << "Fetching URI '" << uri << "'";
+
+ // Some checks to make sure using the URI value in shell commands
+ // is safe. TODO(benh): These should be pushed into the scheduler
+ // driver and reported to the user.
+ if (uri.find_first_of('\\') != string::npos ||
+ uri.find_first_of('\'') != string::npos ||
+ uri.find_first_of('\0') != string::npos) {
+ LOG(ERROR) << "URI contains illegal characters, refusing to fetch";
+ return Error("Illegal characters in URI");
+ }
+
+ // Grab the resource from HDFS if its path begins with hdfs:// or
+ // hftp:
+ // TODO(matei): Enforce some size limits on files we get from HDFS
+ if (strings::startsWith(uri, "hdfs://") ||
+ strings::startsWith(uri, "hftp://")) {
+ Try<string> base = os::basename(uri);
+ if (base.isError()) {
+ LOG(ERROR) << "Invalid basename for URI: " << base.error();
+ return Error("Invalid basename for URI");
+ }
+ string path = path::join(directory, base.get());
+
+ HDFS hdfs;
+
+ LOG(INFO) << "Downloading resource from '" << uri
+ << "' to '" << path << "'";
+ Try<Nothing> result = hdfs.copyToLocal(uri, path);
+ if (result.isError()) {
+ LOG(ERROR) << "HDFS copyToLocal failed: " << result.error();
+ return Error(result.error());
+ }
+
+ return path;
+ } else if (strings::startsWith(uri, "http://") ||
+ strings::startsWith(uri, "https://") ||
+ strings::startsWith(uri, "ftp://") ||
+ strings::startsWith(uri, "ftps://")) {
+ string path = uri.substr(uri.find("://") + 3);
+ if (path.find("/") == string::npos ||
+ path.size() <= path.find("/") + 1) {
+ LOG(ERROR) << "Malformed URL (missing path)";
+ return Error("Malformed URI");
+ }
+
+ path = path::join(directory, path.substr(path.find_last_of("/") + 1));
+ LOG(INFO) << "Downloading '" << uri << "' to '" << path << "'";
+ Try<int> code = net::download(uri, path);
+ if (code.isError()) {
+ LOG(ERROR) << "Error downloading resource: " << code.error().c_str();
+ return Error("Fetch of URI failed (" + code.error() + ")");
+ } else if (code.get() != 200) {
+ LOG(ERROR) << "Error downloading resource, received HTTP/FTP return code "
+ << code.get();
+ return Error("HTTP/FTP error (" + stringify(code.get()) + ")");
+ }
+
+ return path;
+ } else { // Copy the local resource.
+ string local = uri;
+ if (local.find_first_of("/") != 0) {
+ // We got a non-Hadoop and non-absolute path.
+ if (os::hasenv("MESOS_FRAMEWORKS_HOME")) {
+ local = path::join(os::getenv("MESOS_FRAMEWORKS_HOME"), local);
+ LOG(INFO) << "Prepended environment variable "
+ << "MESOS_FRAMEWORKS_HOME to relative path, "
+ << "making it: '" << local << "'";
+ } else {
+ LOG(ERROR) << "A relative path was passed for the resource but the "
+ << "environment variable MESOS_FRAMEWORKS_HOME is not set. "
+ << "Please either specify this config option "
+ << "or avoid using a relative path";
+ return Error("Could not resolve relative URI");
+ }
+ }
+
+ Try<string> base = os::basename(local);
+ if (base.isError()) {
+ LOG(ERROR) << base.error();
+ return Error("Fetch of URI failed");
+ }
+
+ // Copy the resource to the directory.
+ string path = path::join(directory, base.get());
+ std::ostringstream command;
+ command << "cp '" << local << "' '" << path << "'";
+ LOG(INFO) << "Copying resource from '" << local
+ << "' to '" << directory << "'";
+
+ int status = os::system(command.str());
+ if (status != 0) {
+ LOG(ERROR) << "Failed to copy '" << local
+ << "' : Exit status " << status;
+ return Error("Local copy failed");
+ }
+
+ return path;
+ }
+}
+
+
+int main(int argc, char* argv[])
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+ CommandInfo commandInfo;
+ // Construct URIs from the encoded environment string.
+ const std::string& uris = os::getenv("MESOS_EXECUTOR_URIS");
+ foreach (const std::string& token, strings::tokenize(uris, " ")) {
+ // Delimiter between URI and execute permission.
+ size_t pos = token.rfind("+");
+ CHECK(pos != std::string::npos)
+ << "Invalid executor uri token in env " << token;
+
+ CommandInfo::URI uri;
+ uri.set_value(token.substr(0, pos));
+ uri.set_executable(token.substr(pos + 1) == "1");
+
+ commandInfo.add_uris()->MergeFrom(uri);
+ }
+
+ CHECK(os::hasenv("MESOS_WORK_DIRECTORY"))
+ << "Missing MESOS_WORK_DIRECTORY environment variable";
+ std::string directory = os::getenv("MESOS_WORK_DIRECTORY");
+
+ // We cannot use Some in the ternary expression because the compiler needs to
+ // be able to infer the type, thus the explicit Option<string>.
+ // TODO(idownes): Add an os::hasenv that returns an Option<string>.
+ Option<std::string> user = os::hasenv("MESOS_USER")
+ ? Option<std::string>(os::getenv("MESOS_USER")) // Explicit so it compiles.
+ : None();
+
+ // Fetch each URI to a local file, chmod, then chown if a user is provided.
+ foreach (const CommandInfo::URI& uri, commandInfo.uris()) {
+ // Fetch the URI to a local file.
+ Try<string> fetched = fetch(uri.value(), directory);
+ if (fetched.isError()) {
+ EXIT(1) << "Failed to fetch: " << uri.value();
+ }
+
+ // Chmod the fetched URI if it's executable, else assume it's an archive
+ // that should be extracted.
+ if (uri.executable()) {
+ bool chmodded = os::chmod(
+ fetched.get(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+ if (!chmodded) {
+ EXIT(1) << "Failed to chmod: " << fetched.get();
+ }
+ } else {
+ //TODO(idownes): Consider removing the archive once extracted.
+ // Try to extract the file if it's recognized as an archive.
+ Try<Nothing> extracted = extract(fetched.get(), directory);
+ if (extracted.isError()) {
+ EXIT(1) << "Failed to extract "
+ << fetched.get() << ":" << extracted.error();
+ }
+ }
+
+ // Recursively chown the directory if a user is provided.
+ if (user.isSome()) {
+ Try<Nothing> chowned = os::chown(user.get(), directory);
+ if (chowned.isError()) {
+ EXIT(1) << "Failed to chown " << directory << ": " << chowned.error();
+ }
+ }
+ }
+
+ return 0;
+}
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/launcher/launcher.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/launcher.cpp b/src/launcher/launcher.cpp
deleted file mode 100644
index d5ab667..0000000
--- a/src/launcher/launcher.cpp
+++ /dev/null
@@ -1,489 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <dirent.h>
-#include <errno.h>
-#include <libgen.h>
-#include <pwd.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <iostream>
-#include <map>
-#include <sstream>
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <stout/fatal.hpp>
-#include <stout/foreach.hpp>
-#include <stout/net.hpp>
-#include <stout/nothing.hpp>
-#include <stout/os.hpp>
-#include <stout/path.hpp>
-
-#include "hdfs/hdfs.hpp"
-
-#include "launcher/launcher.hpp"
-
-#include "slave/flags.hpp"
-#include "slave/paths.hpp"
-#include "slave/state.hpp"
-
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::map;
-using std::ostringstream;
-using std::string;
-
-namespace mesos {
-namespace internal {
-namespace launcher {
-
-ExecutorLauncher::ExecutorLauncher(
- const SlaveID& _slaveId,
- const FrameworkID& _frameworkId,
- const ExecutorID& _executorId,
- const UUID& _uuid,
- const CommandInfo& _commandInfo,
- const string& _user,
- const string& _workDirectory,
- const string& _slaveDirectory,
- const string& _slavePid,
- const string& _frameworksHome,
- const string& _hadoopHome,
- bool _redirectIO,
- bool _shouldSwitchUser,
- bool _checkpoint,
- Duration _recoveryTimeout)
- : slaveId(_slaveId),
- frameworkId(_frameworkId),
- executorId(_executorId),
- uuid(_uuid),
- commandInfo(_commandInfo),
- user(_user),
- workDirectory(_workDirectory),
- slaveDirectory(_slaveDirectory),
- slavePid(_slavePid),
- frameworksHome(_frameworksHome),
- hadoopHome(_hadoopHome),
- redirectIO(_redirectIO),
- shouldSwitchUser(_shouldSwitchUser),
- checkpoint(_checkpoint),
- recoveryTimeout(_recoveryTimeout) {}
-
-
-ExecutorLauncher::~ExecutorLauncher() {}
-
-
-// NOTE: We avoid fatalerror()s in this function because, we don't
-// want to kill the slave (in the case of cgroups isolator).
-int ExecutorLauncher::setup()
-{
- // Checkpoint the forked pid, if necessary. The checkpointing must
- // be done in the forked process (cgroups isolator) or execed
- // launcher process (process isolator), because the slave process
- // can die immediately after the isolator forks but before it would
- // have a chance to write the pid to disk. That would result in an
- // orphaned executor process unknown to the recovering slave.
- if (checkpoint) {
- const string& path = slave::paths::getForkedPidPath(
- slave::paths::getMetaRootDir(slaveDirectory),
- slaveId,
- frameworkId,
- executorId,
- uuid);
- cout << "Checkpointing executor's forked pid " << getpid()
- << " to '" << path << "'" << endl;
-
- Try<Nothing> checkpoint =
- slave::state::checkpoint(path, stringify(getpid()));
-
- if (checkpoint.isError()) {
- cerr << "Failed to checkpoint executor's forked pid to '"
- << path << "': " << checkpoint.error();
- return -1;
- }
- }
-
- const string& cwd = os::getcwd();
-
- // TODO(benh): Do this in the slave?
- if (shouldSwitchUser) {
- Try<Nothing> chown = os::chown(user, workDirectory);
-
- if (chown.isError()) {
- cerr << "Failed to change ownership of the executor work directory "
- << workDirectory << " to user " << user << ": " << chown.error()
- << endl;
- return -1;
- }
- }
-
- // Enter working directory.
- if (!os::chdir(workDirectory)) {
- cerr << "Failed to chdir into executor work directory" << endl;
- return -1;
- }
-
- // Redirect output to files in working dir if required.
- // TODO(bmahler): It would be best if instead of closing stderr /
- // stdout and redirecting, we instead always output to stderr /
- // stdout. Also tee'ing their output into the work directory files
- // when redirection is desired.
- if (redirectIO) {
- if (freopen("stdout", "w", stdout) == NULL) {
- fatalerror("freopen failed");
- }
- if (freopen("stderr", "w", stderr) == NULL) {
- fatalerror("freopen failed");
- }
- }
-
- if (fetchExecutors() < 0) {
- cerr << "Failed to fetch executors" << endl;
- return -1;
- }
-
- // Go back to previous directory.
- if (!os::chdir(cwd)) {
- cerr << "Failed to chdir (back) into slave directory" << endl;
- return -1;
- }
-
- return 0;
-}
-
-
-int ExecutorLauncher::launch()
-{
- // Enter working directory.
- if (os::chdir(workDirectory) < 0) {
- fatalerror("Failed to chdir into the executor work directory");
- }
-
- if (shouldSwitchUser) {
- switchUser();
- }
-
- setupEnvironment();
-
- const string& command = commandInfo.value();
-
- // Execute the command (via '/bin/sh -c command').
- execl("/bin/sh", "sh", "-c", command.c_str(), (char*) NULL);
-
- // If we get here, the execv call failed.
- fatalerror("Could not execute '/bin/sh -c %s'", command.c_str());
-
- return -1; // Silence end of non-void function warning.
-}
-
-
-int ExecutorLauncher::run()
-{
- int ret = setup();
- if (ret < 0) {
- return ret;
- }
- return launch();
-}
-
-
-// Download the executor's files and optionally set executable permissions
-// if requested.
-int ExecutorLauncher::fetchExecutors()
-{
- cout << "Fetching resources into '" << workDirectory << "'" << endl;
-
- foreach(const CommandInfo::URI& uri, commandInfo.uris()) {
- string resource = uri.value();
- bool executable = uri.has_executable() && uri.executable();
-
- cout << "Fetching resource '" << resource << "'" << endl;
-
- // Some checks to make sure using the URI value in shell commands
- // is safe. TODO(benh): These should be pushed into the scheduler
- // driver and reported to the user.
- if (resource.find_first_of('\\') != string::npos ||
- resource.find_first_of('\'') != string::npos ||
- resource.find_first_of('\0') != string::npos) {
- cerr << "Illegal characters in URI" << endl;
- return -1;
- }
-
- // Grab the resource from HDFS if its path begins with hdfs:// or
- // htfp://. TODO(matei): Enforce some size limits on files we get
- // from HDFS
- if (resource.find("hdfs://") == 0 || resource.find("hftp://") == 0) {
- HDFS hdfs(path::join(hadoopHome, "bin/hadoop"));
-
- Try<std::string> basename = os::basename(resource);
- if (basename.isError()) {
- cerr << basename.error() << endl;
- return -1;
- }
-
- string localFile = path::join(".", basename.get());
-
- Try<Nothing> copy = hdfs.copyToLocal(resource, localFile);
-
- if (copy.isError()) {
- cerr << "Failed to copy from HDFS: " << copy.error() << endl;
- return -1;
- }
-
- resource = localFile;
- } else if (resource.find("http://") == 0
- || resource.find("https://") == 0
- || resource.find("ftp://") == 0
- || resource.find("ftps://") == 0) {
- string path = resource.substr(resource.find("://") + 3);
- if (path.find("/") == string::npos) {
- cerr << "Malformed URL (missing path)" << endl;
- return -1;
- }
-
- if (path.size() <= path.find("/") + 1) {
- cerr << "Malformed URL (missing path)" << endl;
- return -1;
- }
-
- path = path::join(".", path.substr(path.find_last_of("/") + 1));
- cout << "Downloading '" << resource << "' to '" << path << "'" << endl;
- Try<int> code = net::download(resource, path);
- if (code.isError()) {
- cerr << "Error downloading resource: " << code.error().c_str() << endl;
- return -1;
- } else if (code.get() != 200) {
- cerr << "Error downloading resource, received HTTP/FTP return code "
- << code.get() << endl;
- return -1;
- }
- resource = path;
- } else { // Copy the local resource.
- if (resource.find_first_of("/") != 0) {
- // We got a non-Hadoop and non-absolute path.
- if (frameworksHome != "") {
- resource = path::join(frameworksHome, resource);
- cout << "Prepended configuration option frameworks_home to resource "
- << "path, making it: '" << resource << "'" << endl;
- } else {
- cerr << "A relative path was passed for the resource, but "
- << "the configuration option frameworks_home is not set. "
- << "Please either specify this config option "
- << "or avoid using a relative path" << endl;
- return -1;
- }
- }
-
- // Copy the resource to the current working directory.
- ostringstream command;
- command << "cp '" << resource << "' .";
- cout << "Copying resource from '" << resource << "' to ." << endl;
-
- int status = os::system(command.str());
- if (status != 0) {
- cerr << "Failed to copy '" << resource
- << "' : Exit status " << status << endl;
- return -1;
- }
-
- Try<std::string> base = os::basename(resource);
- if (base.isError()) {
- cerr << base.error() << endl;
- return -1;
- }
-
- resource = path::join(".", base.get());
- }
-
- if (shouldSwitchUser) {
- Try<Nothing> chown = os::chown(user, resource);
-
- if (chown.isError()) {
- cerr << "Failed to chown '" << resource << "' to user " << user << ": "
- << chown.error() << endl;
- return -1;
- }
- }
-
- if (executable &&
- !os::chmod(resource, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
- cerr << "Failed to chmod '" << resource << "'" << endl;
- return -1;
- }
-
- // Extract any .tgz, tar.gz, tar.bz2 or zip files.
- if (strings::endsWith(resource, ".tgz") ||
- strings::endsWith(resource, ".tar.gz")) {
- string command = "tar xzf '" + resource + "'";
- cout << "Extracting resource: " << command << endl;
- int code = os::system(command);
- if (code != 0) {
- cerr << "Failed to extract resource: tar exit code " << code << endl;
- return -1;
- }
- } else if (strings::endsWith(resource, ".tbz2") ||
- strings::endsWith(resource, ".tar.bz2")) {
- string command = "tar xjf '" + resource + "'";
- cout << "Extracting resource: " << command << endl;
- int code = os::system(command);
- if (code != 0) {
- cerr << "Failed to extract resource: tar exit code " << code << endl;
- return -1;
- }
- } else if (strings::endsWith(resource, ".txz") ||
- strings::endsWith(resource, ".tar.xz")) {
- // If you want to use XZ on Mac OS, you can try the packages here:
- // http://macpkg.sourceforge.net/
- string command = "tar xJf '" + resource + "'";
- cout << "Extracting resource: " << command << endl;
- int code = os::system(command);
- if (code != 0) {
- cerr << "Failed to extract resource: tar exit code " << code << endl;
- return -1;
- }
- } else if (strings::endsWith(resource, ".zip")) {
- string command = "unzip '" + resource + "'";
- cout << "Extracting resource: " << command << endl;
- int code = os::system(command);
- if (code != 0) {
- cerr << "Failed to extract resource: unzip exit code " << code << endl;
- return -1;
- }
- }
- }
-
- // Recursively chown the work directory, since extraction may have occurred.
- if (shouldSwitchUser) {
- Try<Nothing> chown = os::chown(user, ".");
-
- if (chown.isError()) {
- cerr << "Failed to recursively chown the work directory "
- << workDirectory << " to user " << user << ": " << chown.error()
- << endl;
- return -1;
- }
- }
-
- return 0;
-}
-
-
-void ExecutorLauncher::switchUser()
-{
- if (!os::su(user)) {
- fatal("Failed to switch to user %s for executor %s of framework %s",
- user.c_str(), executorId.value().c_str(), frameworkId.value().c_str());
- }
-}
-
-
-// Set up environment variables for launching a framework's executor.
-void ExecutorLauncher::setupEnvironment()
-{
- foreachpair (const string& key, const string& value, getEnvironment()) {
- os::setenv(key, value);
- }
-}
-
-
-map<string, string> ExecutorLauncher::getEnvironment()
-{
- map<string, string> env;
-
- // Set LIBPROCESS_PORT so that we bind to a random free port (since
- // this might have been set via --port option). We do this before
- // the environment variables below in case it is included.
- env["LIBPROCESS_PORT"] = "0";
-
- // Also add MESOS_NATIVE_LIBRARY if it's not already present (and
- // like above, we do this before the environment variables below in
- // case the framework wants to override).
- if (!os::hasenv("MESOS_NATIVE_LIBRARY")) {
- string path =
-#ifdef __APPLE__
- LIBDIR "/libmesos-" VERSION ".dylib";
-#else
- LIBDIR "/libmesos-" VERSION ".so";
-#endif
- if (os::exists(path)) {
- env["MESOS_NATIVE_LIBRARY"] = path;
- }
- }
-
- // Set up the environment as specified in the ExecutorInfo.
- if (commandInfo.has_environment()) {
- foreach (const Environment::Variable& variable,
- commandInfo.environment().variables()) {
- env[variable.name()] = variable.value();
- }
- }
-
- // Set Mesos environment variables for slave ID, framework ID, etc.
- env["MESOS_DIRECTORY"] = workDirectory;
- env["MESOS_SLAVE_PID"] = slavePid;
- env["MESOS_SLAVE_ID"] = slaveId.value();
- env["MESOS_FRAMEWORK_ID"] = frameworkId.value();
- env["MESOS_EXECUTOR_ID"] = executorId.value();
- env["MESOS_EXECUTOR_UUID"] = uuid.toString();
- env["MESOS_CHECKPOINT"] = checkpoint ? "1" : "0";
-
- if (checkpoint) {
- env["MESOS_RECOVERY_TIMEOUT"] = stringify(recoveryTimeout);
- }
-
- return env;
-}
-
-
-// Get Mesos environment variables that launcher/main.cpp will
-// pass as arguments to an ExecutorLauncher there.
-map<string, string> ExecutorLauncher::getLauncherEnvironment()
-{
- map<string, string> env = getEnvironment();
-
- string uris = "";
- foreach (const CommandInfo::URI& uri, commandInfo.uris()) {
- uris += uri.value() + "+" +
- (uri.has_executable() && uri.executable() ? "1" : "0");
- uris += " ";
- }
-
- // Remove extra space at the end.
- if (uris.size() > 0) {
- uris = strings::trim(uris);
- }
-
- env["MESOS_EXECUTOR_URIS"] = uris;
- env["MESOS_COMMAND"] = commandInfo.value();
- env["MESOS_USER"] = user;
- env["MESOS_SLAVE_DIRECTORY"] = slaveDirectory;
- env["MESOS_HADOOP_HOME"] = hadoopHome;
- env["MESOS_REDIRECT_IO"] = redirectIO ? "1" : "0";
- env["MESOS_SWITCH_USER"] = shouldSwitchUser ? "1" : "0";
-
- return env;
-}
-
-} // namespace launcher {
-} // namespace internal {
-} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/launcher/launcher.hpp
----------------------------------------------------------------------
diff --git a/src/launcher/launcher.hpp b/src/launcher/launcher.hpp
deleted file mode 100644
index 104fe81..0000000
--- a/src/launcher/launcher.hpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LAUNCHER_HPP__
-#define __LAUNCHER_HPP__
-
-#include <map>
-#include <string>
-
-#include <mesos/mesos.hpp>
-
-#include <stout/duration.hpp>
-#include <stout/uuid.hpp>
-
-#include "slave/flags.hpp"
-
-namespace mesos {
-namespace internal {
-namespace launcher {
-
-// This class sets up the environment for an executor and then exec()'s it.
-// It can either be used after a fork() in the slave process, or run as a
-// standalone program (with the main function in launcher_main.cpp).
-//
-// The environment is initialized through for steps:
-// 1) A work directory for the framework is created by createWorkingDirectory().
-// 2) The executor is fetched off HDFS if necessary by fetchExecutor().
-// 3) Environment variables are set by setupEnvironment().
-// 4) We switch to the framework's user in switchUser().
-//
-// Isolators that wish to override the default behaviour can subclass
-// Launcher and override some of the methods to perform extra actions.
-class ExecutorLauncher {
-public:
- ExecutorLauncher(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid,
- const CommandInfo& commandInfo,
- const std::string& user,
- const std::string& workDirectory,
- const std::string& slaveWorkDirectory,
- const std::string& slavePid,
- const std::string& frameworksHome,
- const std::string& hadoopHome,
- bool redirectIO,
- bool shouldSwitchUser,
- bool checkpoint,
- Duration recoveryTimeout);
-
- virtual ~ExecutorLauncher();
-
- // Initialize the working directory and fetch the executor.
- virtual int setup();
-
- // Launches the downloaded executor.
- virtual int launch();
-
- // Convenience function that calls setup() and then launch().
- virtual int run();
-
- // Return a map of environment variables for exec'ing a
- // launch_main.cpp (mesos-launcher binary) process. This is used
- // by isolators that cannot exec the user's executor directly
- // (e.g., due to potential deadlocks in forked process).
- virtual std::map<std::string, std::string> getLauncherEnvironment();
-
-protected:
- // Download the required files for the executor from the given set of URIs.
- // Optionally, it will set the executable file permissions for the files.
- // This method is expected to place files in the workDirectory.
- virtual int fetchExecutors();
-
- // Return a map of environment variables for launching a
- // framework's executor.
- virtual std::map<std::string, std::string> getEnvironment();
-
- // Set up environment variables for launching a
- // framework's executor.
- virtual void setupEnvironment();
-
- // Switch to a framework's user in preparation for exec()'ing its executor.
- virtual void switchUser();
-
-protected:
- const SlaveID slaveId;
- const FrameworkID frameworkId;
- const ExecutorID executorId;
- const UUID uuid;
- const CommandInfo commandInfo;
- const std::string user;
- const std::string workDirectory;
- const std::string slaveDirectory;
- const std::string slavePid;
- const std::string frameworksHome;
- const std::string hadoopHome;
- const bool redirectIO; // Whether to redirect stdout and stderr to files.
- const bool shouldSwitchUser; // Whether to setuid to framework's user.
- const bool checkpoint; // Whether the framework enabled checkpointing.
-
- // Executor suicide timeout for slave recovery.
- const Duration recoveryTimeout;
-};
-
-} // namespace launcher {
-} // namespace internal {
-} // namespace mesos {
-
-#endif // __LAUNCHER_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/launcher/main.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/main.cpp b/src/launcher/main.cpp
deleted file mode 100644
index de64609..0000000
--- a/src/launcher/main.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <string>
-
-#include <mesos/mesos.hpp>
-
-#include <stout/duration.hpp>
-#include <stout/strings.hpp>
-#include <stout/os.hpp>
-
-#include "launcher/launcher.hpp"
-
-using namespace mesos;
-using namespace mesos::internal; // For 'utils'.
-
-using std::string;
-
-
-int main(int argc, char** argv)
-{
- GOOGLE_PROTOBUF_VERIFY_VERSION;
-
- SlaveID slaveId;
- slaveId.set_value(os::getenv("MESOS_SLAVE_ID"));
-
- FrameworkID frameworkId;
- frameworkId.set_value(os::getenv("MESOS_FRAMEWORK_ID"));
-
- ExecutorID executorId;
- executorId.set_value(os::getenv("MESOS_EXECUTOR_ID"));
-
- CommandInfo commandInfo;
- commandInfo.set_value(os::getenv("MESOS_COMMAND"));
-
- // Construct URIs from the encoded environment string.
- const std::string& uris = os::getenv("MESOS_EXECUTOR_URIS");
- foreach (const std::string& token, strings::tokenize(uris, " ")) {
- size_t pos = token.rfind("+"); // Delim between uri and exec permission.
- CHECK(pos != std::string::npos) << "Invalid executor uri token in env "
- << token;
-
- CommandInfo::URI uri;
- uri.set_value(token.substr(0, pos));
- uri.set_executable(token.substr(pos + 1) == "1");
-
- commandInfo.add_uris()->MergeFrom(uri);
- }
-
- bool checkpoint = os::getenv("MESOS_CHECKPOINT", false) == "1";
-
- Duration recoveryTimeout = slave::RECOVERY_TIMEOUT;
-
- // Get the recovery timeout if checkpointing is enabled.
- if (checkpoint) {
- string value = os::getenv("MESOS_RECOVERY_TIMEOUT", false);
-
- if (!value.empty()) {
- Try<Duration> _recoveryTimeout = Duration::parse(value);
-
- CHECK_SOME(_recoveryTimeout)
- << "Cannot parse MESOS_RECOVERY_TIMEOUT '" + value + "'";
-
- recoveryTimeout = _recoveryTimeout.get();
- }
- }
-
- return mesos::internal::launcher::ExecutorLauncher(
- slaveId,
- frameworkId,
- executorId,
- UUID::fromString(os::getenv("MESOS_EXECUTOR_UUID")),
- commandInfo,
- os::getenv("MESOS_USER"),
- os::getenv("MESOS_DIRECTORY"),
- os::getenv("MESOS_SLAVE_DIRECTORY"),
- os::getenv("MESOS_SLAVE_PID"),
- os::getenv("MESOS_FRAMEWORKS_HOME", false),
- os::getenv("MESOS_HADOOP_HOME"),
- os::getenv("MESOS_REDIRECT_IO") == "1",
- os::getenv("MESOS_SWITCH_USER") == "1",
- checkpoint,
- recoveryTimeout)
- .run();
-}
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/local/local.cpp
----------------------------------------------------------------------
diff --git a/src/local/local.cpp b/src/local/local.cpp
index e650de9..5112391 100644
--- a/src/local/local.cpp
+++ b/src/local/local.cpp
@@ -40,7 +40,7 @@
#include "master/master.hpp"
#include "master/registrar.hpp"
-#include "slave/process_isolator.hpp"
+#include "slave/containerizer/containerizer.hpp"
#include "slave/slave.hpp"
#include "state/leveldb.hpp"
@@ -57,9 +57,8 @@ using mesos::internal::master::allocator::HierarchicalDRFAllocatorProcess;
using mesos::internal::master::Master;
using mesos::internal::master::Registrar;
+using mesos::internal::slave::Containerizer;
using mesos::internal::slave::Slave;
-using mesos::internal::slave::Isolator;
-using mesos::internal::slave::ProcessIsolator;
using process::PID;
using process::UPID;
@@ -80,7 +79,7 @@ static state::Storage* storage = NULL;
static state::protobuf::State* state = NULL;
static Registrar* registrar = NULL;
static Master* master = NULL;
-static map<Isolator*, Slave*> slaves;
+static map<Containerizer*, Slave*> slaves;
static StandaloneMasterDetector* detector = NULL;
static MasterContender* contender = NULL;
static Files* files = NULL;
@@ -142,9 +141,6 @@ PID<Master> launch(const Flags& flags, Allocator* _allocator)
vector<UPID> pids;
for (int i = 0; i < flags.num_slaves; i++) {
- // TODO(benh): Create a local isolator?
- ProcessIsolator* isolator = new ProcessIsolator();
-
slave::Flags flags;
Try<Nothing> load = flags.load("MESOS_");
if (load.isError()) {
@@ -152,13 +148,18 @@ PID<Master> launch(const Flags& flags, Allocator* _allocator)
<< "slave flags from the environment: " << load.error();
}
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ if (containerizer.isError()) {
+ EXIT(1) << "Failed to create a containerizer: " << containerizer.error();
+ }
+
// Use a different work directory for each slave.
flags.work_dir = path::join(flags.work_dir, stringify(i));
// NOTE: At this point detector is already initialized by the
// Master.
- Slave* slave = new Slave(flags, true, detector, isolator, files);
- slaves[isolator] = slave;
+ Slave* slave = new Slave(flags, detector, containerizer.get(), files);
+ slaves[containerizer.get()] = slave;
pids.push_back(process::spawn(slave));
}
@@ -182,10 +183,10 @@ void shutdown()
// isolator, we can't delete the isolator until we have stopped
// the slave.
- foreachpair (Isolator* isolator, Slave* slave, slaves) {
+ foreachpair (Containerizer* containerizer, Slave* slave, slaves) {
process::terminate(slave->self());
process::wait(slave->self());
- delete isolator;
+ delete containerizer;
delete slave;
}
[04/11] git commit: Containerizer - launchers (part 2).
Posted by vi...@apache.org.
Containerizer - launchers (part 2).
Launcher interface and MesosLauncher to support MesosContainerizers.
Launchers handle the lifecycle of the executor process
(and descendants).
Review: https://reviews.apache.org/r/16149
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/71c6b5c0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/71c6b5c0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/71c6b5c0
Branch: refs/heads/master
Commit: 71c6b5c050df91083b6308eabb3abf02dca6b129
Parents: f90fe76
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:27:51 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:50 2014 -0800
----------------------------------------------------------------------
src/Makefile.am | 4 +
src/linux/cgroups.cpp | 115 +++++++++
src/linux/cgroups.hpp | 13 +
src/slave/containerizer/cgroups_launcher.cpp | 274 ++++++++++++++++++++++
src/slave/containerizer/cgroups_launcher.hpp | 64 +++++
src/slave/containerizer/launcher.cpp | 159 +++++++++++++
src/slave/containerizer/launcher.hpp | 88 +++++++
7 files changed, 717 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 9d39666..a4b5a52 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -177,6 +177,7 @@ libmesos_no_3rdparty_la_SOURCES = \
slave/slave.cpp \
slave/http.cpp \
slave/containerizer/containerizer.cpp \
+ slave/containerizer/launcher.cpp \
slave/containerizer/mesos_containerizer.cpp \
slave/status_update_manager.cpp \
exec/exec.cpp \
@@ -204,6 +205,7 @@ nodist_pkginclude_HEADERS = ../include/mesos/mesos.hpp mesos.pb.h
if OS_LINUX
libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
+ libmesos_no_3rdparty_la_SOURCES += slave/container/cgroups_launcher.cpp
libmesos_no_3rdparty_la_SOURCES += linux/fs.cpp
else
EXTRA_DIST += linux/cgroups.cpp
@@ -229,7 +231,9 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp \
master/registrar.hpp \
master/master.hpp master/sorter.hpp \
messages/messages.hpp slave/constants.hpp \
+ slave/containerizer/cgroups_launcher.hpp \
slave/containerizer/containerizer.hpp \
+ slave/containerizer/launcher.hpp \
slave/containerizer/mesos_containerizer.hpp \
slave/flags.hpp slave/gc.hpp slave/monitor.hpp \
slave/paths.hpp slave/state.hpp \
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 19ab1f3..8ac2599 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -405,6 +405,121 @@ static Try<Nothing> write(
} // namespace internal {
+Try<string> prepare(
+ const std::string& baseHierarchy,
+ const std::string& subsystem,
+ const std::string& cgroup)
+{
+ // Construct the hierarchy for this subsystem.
+ std::string hierarchy = path::join(baseHierarchy, subsystem);
+
+ // Ensure cgroups are enabled in the kernel.
+ if (!cgroups::enabled()) {
+ return Error("No cgroups support detected in this kernel");
+ }
+
+ // Ensure we have root permissions.
+ if (geteuid() != 0) {
+ return Error("Using cgroups requires root permissions");
+ }
+
+ // Check if the hierarchy is already mounted, and if not, mount it.
+ Try<bool> mounted = cgroups::mounted(hierarchy);
+
+ if (mounted.isError()) {
+ return Error("Failed to determine if " + hierarchy +
+ " is already mounted: " + mounted.error());
+ }
+
+ if (mounted.get()) {
+ // Make sure that desired subsystem is attached to the already
+ // mounted hierarchy.
+ Try<std::set<std::string> > attached = cgroups::subsystems(hierarchy);
+ if (attached.isError()) {
+ return Error(string("Failed to determine the attached subsystems") +
+ "for the cgroup hierarchy at " + hierarchy + ": " +
+ attached.error());
+ }
+
+ if (attached.get().count(subsystem) == 0) {
+ return Error("The cgroups hierarchy at " + hierarchy +
+ " can not be used because it does not have the '" +
+ subsystem + "' subsystem attached");
+ }
+
+ if (attached.get().size() > 1) {
+ return Error("The " + subsystem + " subsystem is co-mounted at " +
+ hierarchy + " with other subsytems");
+ }
+ } else {
+ // Attempt to mount the hierarchy ourselves.
+ if (os::exists(hierarchy)) {
+ // The path specified by the given hierarchy already exists in
+ // the file system. We try to remove it if it is an empty
+ // directory. This will helps us better deal with slave restarts
+ // since we won't need to manually remove the directory.
+ Try<Nothing> rmdir = os::rmdir(hierarchy, false);
+ if (rmdir.isError()) {
+ return Error("Failed to mount cgroups hierarchy at '" + hierarchy +
+ "' because we could not remove the existing directory: " +
+ rmdir.error());
+ }
+ }
+
+ // Mount the subsystem.
+ Try<Nothing> mount = cgroups::mount(hierarchy, subsystem);
+ if (mount.isError()) {
+ return Error("Failed to mount cgroups hierarchy at '" + hierarchy +
+ "': " + mount.error());
+ }
+ }
+
+ // Create the cgroup if it doesn't exist.
+ Try<bool> exists = cgroups::exists(hierarchy, cgroup);
+ if (exists.isError()) {
+ return Error("Failed to check existence of root cgroup " +
+ path::join(hierarchy, cgroup) +
+ ": " + exists.error());
+ }
+
+ if (!exists.get()) {
+ // No cgroup exists, create it.
+ Try<Nothing> create = cgroups::create(hierarchy, cgroup);
+ if (create.isError()) {
+ return Error("Failed to create root cgroup " +
+ path::join(hierarchy, cgroup) +
+ ": " + create.error());
+ }
+ }
+
+ const string& testCgroup = path::join(cgroup, "test");
+ // Create a nested test cgroup if it doesn't exist.
+ exists = cgroups::exists(hierarchy, testCgroup);
+ if (exists.isError()) {
+ return Error("Failed to check existence nested of test cgroup " +
+ path::join(hierarchy, testCgroup) +
+ ": " + exists.error());
+ }
+
+ if (!exists.get()) {
+ // Make sure this kernel supports creating nested cgroups.
+ Try<Nothing> create = cgroups::create(hierarchy, testCgroup);
+ if (create.isError()) {
+ return Error(string("Failed to create a nested 'test' cgroup.") +
+ " Your kernel might be too old to use the" +
+ " cgroups isolator: " + create.error());
+ }
+ }
+
+ // Remove the nested 'test' cgroup.
+ Try<Nothing> remove = cgroups::remove(hierarchy, testCgroup);
+ if (remove.isError()) {
+ return Error("Failed to remove the nested test cgroup: " + remove.error());
+ }
+
+ return hierarchy;
+}
+
// Returns some error string if either (a) hierarchy is not mounted,
// (b) cgroup does not exist, or (c) control file does not exist.
static Option<Error> verify(
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index bf0d173..2f4ab25 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -58,6 +58,19 @@ const unsigned int EMPTY_WATCHER_RETRIES = 50;
// Control - A control file in a cgroup (e.g. tasks, cpu.shares).
+// TODO(idownes): Rework all functions in this file to better support
+// separately mounted subsystems.
+
+// Prepare a hierarchy which has the specified subsystem (and only that
+// subsystem) mounted and also has the specified cgroup created. Returns the
+// hierarchy. Checks are made to ensure that cgroups are supported and that
+// nested cgroups can be created.
+Try<std::string> prepare(
+ const std::string& baseHierarchy,
+ const std::string& subsystem,
+ const std::string& cgroup);
+
+
// Check whether cgroups module is enabled on the current machine.
// @return True if cgroups module is enabled.
// False if cgroups module is not available.
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/slave/containerizer/cgroups_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/cgroups_launcher.cpp b/src/slave/containerizer/cgroups_launcher.cpp
new file mode 100644
index 0000000..0921d63
--- /dev/null
+++ b/src/slave/containerizer/cgroups_launcher.cpp
@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+
+#include <vector>
+
+#include <stout/hashset.hpp>
+#include <stout/path.hpp>
+#include <stout/unreachable.hpp>
+
+#include "linux/cgroups.hpp"
+
+#include "mesos/resources.hpp"
+
+#include "slave/containerizer/cgroups_launcher.hpp"
+
+using namespace process;
+
+using std::list;
+using std::string;
+using std::vector;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+using state::RunState;
+
+CgroupsLauncher::CgroupsLauncher(const Flags& _flags, const string& _hierarchy)
+ : flags(_flags),
+ hierarchy(_hierarchy) {}
+
+
+Try<Launcher*> CgroupsLauncher::create(const Flags& flags)
+{
+ Try<string> hierarchy = cgroups::prepare(
+ flags.cgroups_hierarchy, "freezer", flags.cgroups_root);
+
+ if (hierarchy.isError()) {
+ return Error("Failed to create cgroups launcher: " + hierarchy.error());
+ }
+
+ LOG(INFO) << "Using " << hierarchy.get()
+ << " as the freezer hierarchy for the cgroups launcher";
+
+ return new CgroupsLauncher(flags, hierarchy.get());
+}
+
+
+Try<Nothing> CgroupsLauncher::recover(const std::list<state::RunState>& states)
+{
+ hashset<string> cgroups;
+
+ foreach (const RunState& state, states) {
+ if (state.id.isNone()) {
+ return Error("ContainerID is required to recover");
+ }
+ const ContainerID& containerId = state.id.get();
+
+ Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
+
+ if (!exists.get()) {
+ // This may occur if the freezer cgroup was destroyed but the slave dies
+ // before noticing this.
+ // The containerizer will monitor the container's pid and notice that it
+ // has exited, triggering destruction of the container.
+ LOG(INFO) << "Couldn't find freezer cgroup for container " << containerId;
+ continue;
+ }
+
+ if (state.forkedPid.isNone()) {
+ return Error("Executor pid is required to recover container " +
+ stringify(containerId));
+ }
+ pid_t pid = state.forkedPid.get();
+
+ if (pids.containsValue(pid)) {
+ // This should (almost) never occur. There is the possibility that a new
+ // executor is launched with the same pid as one that just exited (highly
+ // unlikely) and the slave dies after the new executor is launched but
+ // before it hears about the termination of the earlier executor (also
+ // unlikely). Regardless, the launcher can't do anything sensible so this
+ // is considered an error.
+ return Error("Detected duplicate pid " + stringify(pid) +
+ " for container " + stringify(containerId));
+ }
+
+ pids.put(containerId, pid);
+
+ cgroups.insert(cgroup(containerId));
+ }
+
+ Try<vector<string> > orphans = cgroups::get(hierarchy, flags.cgroups_root);
+ if (orphans.isError()) {
+ return Error(orphans.error());
+ }
+
+ foreach (const string& orphan, orphans.get()) {
+ if (!cgroups.contains(orphan)) {
+ LOG(INFO) << "Removing orphaned cgroup"
+ << " '" << path::join("freezer", orphan) << "'";
+ cgroups::destroy(hierarchy, orphan);
+ }
+ }
+
+ return Nothing();
+}
+
+
+Try<pid_t> CgroupsLauncher::fork(
+ const ContainerID& containerId,
+ const lambda::function<int()>& inChild)
+{
+ // Create a freezer cgroup for this container if necessary.
+ Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
+
+ if (exists.isError()) {
+ return Error("Failed to create freezer cgroup: " + exists.error());
+ }
+
+ if (!exists.get()) {
+ Try<Nothing> created = cgroups::create(hierarchy, cgroup(containerId));
+
+ if (created.isError()) {
+ LOG(ERROR) << "Failed to create freezer cgroup for container '"
+ << containerId << "': " << created.error();
+ return Error("Failed to contain process: " + created.error());
+ }
+ }
+
+ // Additional processes forked will be put into the same process group and
+ // session.
+ Option<pid_t> pgid = pids.get(containerId);
+
+ // Use a pipe to block the child until it's been moved into the freezer
+ // cgroup.
+ int pipes[2];
+ // We assume this should not fail under reasonable conditions so we use CHECK.
+ CHECK(pipe(pipes) == 0);
+
+ pid_t pid;
+
+ if ((pid = ::fork()) == -1) {
+ return ErrnoError("Failed to fork");
+ }
+
+ if (pid > 0) {
+ // In parent.
+ os::close(pipes[0]);
+
+ // Move the child into the freezer cgroup. Any grandchildren will also be
+ // contained in the cgroup.
+ Try<Nothing> assign = cgroups::assign(hierarchy, cgroup(containerId), pid);
+
+ if (assign.isError()) {
+ LOG(ERROR) << "Failed to assign process " << pid
+ << " of container '" << containerId << "'"
+ << " to its freezer cgroup: " << assign.error();
+ kill(pid, SIGKILL);
+ return Error("Failed to contain process");
+ }
+
+ // Now that we've contained the child we can signal it to continue by
+ // writing to the pipe.
+ int buf;
+ ssize_t len;
+ while ((len = write(pipes[1], &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+ if (len != sizeof(buf)) {
+ // Ensure the child is killed.
+ kill(pid, SIGKILL);
+ os::close(pipes[1]);
+ return Error("Failed to synchronize child process");
+ }
+ os::close(pipes[1]);
+
+ // Store the pid (session id and process group id) if this is the first
+ // process forked for this container.
+ if (!pids.contains(containerId)) {
+ pids.put(containerId, pid);
+ }
+
+ return pid;
+ } else {
+ // In child.
+ os::close(pipes[1]);
+
+ // Move to a previously created process group (and session) if available,
+ // else create a new session and process group. Even though we track
+ // processes using cgroups we need to move to a different session so we're
+ // independent from the slave's session (otherwise children will receive
+ // SIGHUP if the slave exits).
+ // TODO(idownes): perror is not listed as async-signal-safe and should be
+ // reimplemented safely.
+ if (pgid.isSome() && (setpgid(0, pgid.get()) == -1)) {
+ perror("Failed to put child into process group");
+ os::close(pipes[0]);
+ _exit(1);
+ } else if (setsid() == -1) {
+ perror("Failed to put child in a new session");
+ os::close(pipes[0]);
+ _exit(1);
+ }
+
+ // Do a blocking read on the pipe until the parent signals us to continue.
+ int buf;
+ int len;
+ while ((len = read(pipes[0], &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+ if (len != sizeof(buf)) {
+ const char* message = "Failed to synchronize with parent";
+ // Ignore the return value from write() to silence compiler warning.
+ while (write(STDERR_FILENO, message, strlen(message)) == -1 &&
+ errno == EINTR);
+ os::close(pipes[0]);
+ _exit(1);
+ }
+
+ os::close(pipes[0]);
+
+ // This function should exec() and therefore not return.
+ inChild();
+
+ return UNREACHABLE();
+ }
+}
+
+
+Future<Nothing> _destroy(
+ const ContainerID& containerId,
+ process::Future<bool> destroyed)
+{
+ if (destroyed.isFailed()) {
+ LOG(ERROR) << "Failed to destroy freezer cgroup for '"
+ << containerId << "': " << destroyed.failure();
+ return Failure("Failed to destroy launcher: " + destroyed.failure());
+ }
+ return Nothing();
+}
+
+
+Future<Nothing> CgroupsLauncher::destroy(const ContainerID& containerId)
+{
+ pids.erase(containerId);
+
+ return cgroups::destroy(hierarchy, cgroup(containerId))
+ .then(lambda::bind(&_destroy, containerId, lambda::_1));
+}
+
+
+string CgroupsLauncher::cgroup(const ContainerID& containerId)
+{
+ return path::join(flags.cgroups_root, containerId.value());
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/slave/containerizer/cgroups_launcher.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/cgroups_launcher.hpp b/src/slave/containerizer/cgroups_launcher.hpp
new file mode 100644
index 0000000..db61107
--- /dev/null
+++ b/src/slave/containerizer/cgroups_launcher.hpp
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CGROUPS_LAUNCHER_HPP__
+#define __CGROUPS_LAUNCHER_HPP__
+
+#include "slave/containerizer/launcher.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Launcher for Linux systems with cgroups. Uses a freezer cgroup to track
+// pids.
+class CgroupsLauncher : public Launcher
+{
+public:
+ static Try<Launcher*> create(const Flags& flags);
+
+ virtual ~CgroupsLauncher() {}
+
+ virtual Try<Nothing> recover(const std::list<state::RunState>& states);
+
+ virtual Try<pid_t> fork(
+ const ContainerID& containerId,
+ const lambda::function<int()>& inChild);
+
+ virtual process::Future<Nothing> destroy(const ContainerID& containerId);
+
+private:
+ CgroupsLauncher(const Flags& flags, const std::string& hierarchy);
+
+ static const std::string subsystem;
+ const Flags flags;
+ const std::string hierarchy;
+
+ std::string cgroup(const ContainerID& containerId);
+
+ // The 'pid' is the process id of the first process and also the process
+ // group id and session id.
+ hashmap<ContainerID, pid_t> pids;
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __CGROUPS_LAUNCHER_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/slave/containerizer/launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/launcher.cpp b/src/slave/containerizer/launcher.cpp
new file mode 100644
index 0000000..ddfa91c
--- /dev/null
+++ b/src/slave/containerizer/launcher.cpp
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+
+#include <process/collect.hpp>
+#include <process/delay.hpp>
+#include <process/process.hpp>
+#include <process/reap.hpp>
+
+#include <stout/unreachable.hpp>
+
+#include "mesos/resources.hpp"
+
+#include "slave/containerizer/launcher.hpp"
+
+using namespace process;
+
+using std::list;
+using std::string;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+using state::RunState;
+
+Try<Launcher*> PosixLauncher::create(const Flags& flags)
+{
+ return new PosixLauncher();
+}
+
+
+Try<Nothing> PosixLauncher::recover(const list<RunState>& states)
+{
+ foreach (const RunState& state, states) {
+ if (state.id.isNone()) {
+ return Error("ContainerID is required to recover");
+ }
+
+ const ContainerID& containerId = state.id.get();
+
+ if (state.forkedPid.isNone()) {
+ return Error("Executor pid is required to recover container " +
+ stringify(containerId));
+ }
+ pid_t pid = state.forkedPid.get();
+
+ if (pids.containsValue(pid)) {
+ // This should (almost) never occur. There is the possibility that a new
+ // executor is launched with the same pid as one that just exited (highly
+ // unlikely) and the slave dies after the new executor is launched but
+ // before it hears about the termination of the earlier executor (also
+ // unlikely). Regardless, the launcher can't do anything sensible so this
+ // is considered an error.
+ return Error("Detected duplicate pid " + stringify(pid) +
+ " for container " + stringify(containerId));
+ }
+
+ pids.put(containerId, pid);
+ }
+
+ return Nothing();
+}
+
+
+Try<pid_t> PosixLauncher::fork(
+ const ContainerID& containerId,
+ const lambda::function<int()>& inChild)
+{
+ if (pids.contains(containerId)) {
+ return Error("Process has already been forked for container " +
+ stringify(containerId));
+ }
+
+ pid_t pid;
+
+ if ((pid = ::fork()) == -1) {
+ return ErrnoError("Failed to fork");
+ }
+
+ if (pid > 0) {
+ // In parent.
+ LOG(INFO) << "Forked child with pid '" << pid
+ << "' for container '" << containerId << "'";
+ // Store the pid (session id and process group id).
+ pids.put(containerId, pid);
+
+ return pid;
+ } else {
+ // In child.
+ // POSIX guarantees a forked child's pid does not match any existing
+ // process group id so only a single setsid() is required and the session
+ // id will be the pid.
+ // TODO(idownes): perror is not listed as async-signal-safe and should be
+ // reimplemented safely.
+ if (setsid() == -1) {
+ perror("Failed to put child in a new session");
+ _exit(1);
+ }
+
+ // This function should exec() and therefore not return.
+ inChild();
+
+ return UNREACHABLE();
+ }
+}
+
+
+Future<Nothing> _destroy(const Future<Option<int> >& future)
+{
+ if (future.isReady()) {
+ return Nothing();
+ } else {
+ return Failure("Failed to kill all processes: " +
+ (future.isFailed() ? future.failure() : "unknown error"));
+ }
+}
+
+
+Future<Nothing> PosixLauncher::destroy(const ContainerID& containerId)
+{
+ if (!pids.contains(containerId)) {
+ return Failure("Unknown container " + containerId.value());
+ }
+
+ pid_t pid = pids.get(containerId).get();
+
+ // Kill all processes in the session and process group.
+ Try<list<os::ProcessTree> > trees =
+ os::killtree(pid, SIGKILL, true, true);
+
+ pids.erase(containerId);
+
+ // The child process may not have been waited on yet so we'll delay
+ // completing destroy until we're sure it has been reaped.
+ return process::reap(pid)
+ .then(lambda::bind(&_destroy, lambda::_1));
+}
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/71c6b5c0/src/slave/containerizer/launcher.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/launcher.hpp b/src/slave/containerizer/launcher.hpp
new file mode 100644
index 0000000..dee526f
--- /dev/null
+++ b/src/slave/containerizer/launcher.hpp
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LAUNCHER_HPP__
+#define __LAUNCHER_HPP__
+
+#include <list>
+
+#include <process/future.hpp>
+
+#include <stout/lambda.hpp>
+#include <stout/try.hpp>
+
+#include "slave/flags.hpp"
+#include "slave/state.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class Launcher
+{
+public:
+ virtual ~Launcher() {}
+
+ // Recover the necessary state for each container listed in state.
+ virtual Try<Nothing> recover(const std::list<state::RunState>& states) = 0;
+
+ // Fork a new process in the containerized context. The child will call the
+ // specified function and the parent will return the child's pid.
+ // NOTE: The function must be async-signal safe and should exec as soon as
+ // possible.
+ virtual Try<pid_t> fork(
+ const ContainerID& containerId,
+ const lambda::function<int()>&) = 0;
+
+ // Kill all processes in the containerized context.
+ virtual process::Future<Nothing> destroy(const ContainerID& containerId) = 0;
+};
+
+
+// Launcher suitable for any POSIX compliant system. Uses process groups and
+// sessions to track processes in a container. POSIX states that process groups
+// cannot migrate between sessions so all processes for a container will be
+// contained in a session.
+class PosixLauncher : public Launcher
+{
+public:
+ static Try<Launcher*> create(const Flags& flags);
+
+ virtual ~PosixLauncher() {}
+
+ virtual Try<Nothing> recover(const std::list<state::RunState>& states);
+
+ virtual Try<pid_t> fork(
+ const ContainerID& containerId,
+ const lambda::function<int()>& inChild);
+
+ virtual process::Future<Nothing> destroy(const ContainerID& containerId);
+
+private:
+ PosixLauncher() {}
+
+ // The 'pid' is the process id of the first process and also the process
+ // group id and session id.
+ hashmap<ContainerID, pid_t> pids;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __LAUNCHER_HPP__
[07/11] Containerizer (part 1)
Posted by vi...@apache.org.
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/cgroups_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/cgroups_isolator.cpp b/src/slave/cgroups_isolator.cpp
deleted file mode 100644
index ef7dd68..0000000
--- a/src/slave/cgroups_isolator.cpp
+++ /dev/null
@@ -1,1412 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <math.h> // For floor.
-#include <signal.h>
-#include <unistd.h>
-
-#include <sys/file.h> // For flock.
-#include <sys/types.h>
-
-#include <algorithm>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include <process/clock.hpp>
-#include <process/defer.hpp>
-#include <process/dispatch.hpp>
-#include <process/reap.hpp>
-
-#include <stout/bytes.hpp>
-#include <stout/check.hpp>
-#include <stout/duration.hpp>
-#include <stout/error.hpp>
-#include <stout/exit.hpp>
-#include <stout/foreach.hpp>
-#include <stout/hashmap.hpp>
-#include <stout/hashset.hpp>
-#include <stout/lambda.hpp>
-#include <stout/none.hpp>
-#include <stout/nothing.hpp>
-#include <stout/numify.hpp>
-#include <stout/option.hpp>
-#include <stout/os.hpp>
-#include <stout/path.hpp>
-#include <stout/proc.hpp>
-#include <stout/stringify.hpp>
-#include <stout/strings.hpp>
-#include <stout/uuid.hpp>
-
-#include "linux/cgroups.hpp"
-
-#include "slave/cgroups_isolator.hpp"
-#include "slave/state.hpp"
-
-using process::defer;
-using process::Failure;
-using process::Future;
-
-using std::list;
-using std::map;
-using std::set;
-using std::string;
-using std::ostringstream;
-using std::vector;
-
-namespace mesos {
-namespace internal {
-namespace slave {
-
-using state::SlaveState;
-using state::FrameworkState;
-using state::ExecutorState;
-using state::RunState;
-
-// CPU subsystem constants.
-const size_t CPU_SHARES_PER_CPU = 1024;
-const size_t MIN_CPU_SHARES = 10;
-const Duration CPU_CFS_PERIOD = Milliseconds(100); // Linux default.
-const Duration MIN_CPU_CFS_QUOTA = Milliseconds(1);
-
-// Memory subsystem constants.
-const Bytes MIN_MEMORY = Megabytes(32);
-
-
-// This is an approximate double precision equality check.
-// It only considers up to 0.001 precision.
-// This is used so that we can enforce correct arithmetic on "millicpu" units.
-// TODO(bmahler): Banish this to hell when we expose individual cpus as a
-// resource to frameworks, so that we can enforce having no fractions.
-bool almostEqual(double d1, double d2) {
- return (d1 <= (d2 + 0.001)) && (d1 >= (d2 - 0.001));
-}
-
-
-map<proc::CPU, double> Cpuset::grow(
- double delta,
- const map<proc::CPU, double>& usage)
-{
- // The technique used here is to allocate as much as possible to
- // each cpu that has availability, until we've allocated the delta.
- // Note that we examine the cpus in the same order every time, which
- // means we don't yet consider locality.
- map<proc::CPU, double> allocation;
- foreachpair (const proc::CPU& cpu, double used, usage) {
- // Are we done allocating?
- if (almostEqual(delta, 0.0)) {
- break;
- }
-
- // Allocate as much as possible to this CPU.
- if (!almostEqual(used, 1.0)) {
- double free = 1.0 - used;
- double allocated = std::min(delta, free);
- allocation[cpu] = allocated;
- delta -= allocated;
- cpus[cpu] += allocated;
- }
- }
-
- CHECK(almostEqual(delta, 0.0))
- << "Failed to grow the cpuset by " << delta << " cpus\n"
- << " cpus: " << stringify(cpus) << "\n"
- << " usage: " << stringify(usage);
-
- return allocation;
-}
-
-
-map<proc::CPU, double> Cpuset::shrink(double delta)
-{
- // The technique used here is to free as much as possible from the
- // least allocated cpu. This means we'll avoid fragmenting as we're
- // constantly trying to remove cpus belonging to this Cpuset.
- map<proc::CPU, double> deallocation;
- while (!almostEqual(delta, 0.0)) {
- // Find the CPU to which we have the least allocated.
- Option<proc::CPU> least;
- foreachpair (const proc::CPU& cpu, double used, cpus) {
- if (least.isNone() || used <= cpus[least.get()]) {
- least = cpu;
- }
- }
-
- CHECK(least.isSome())
- << "Failed to shrink the cpuset by " << delta << " cpus\n"
- << " cpus: " << stringify(cpus);
-
- // Deallocate as much as possible from the least allocated CPU.
- double used = cpus[least.get()];
- double deallocated = std::min(used, delta);
- deallocation[least.get()] = deallocated;
- delta -= deallocated;
- cpus[least.get()] -= deallocated;
-
- // Ensure this Cpuset never contains unallocated CPUs.
- if (almostEqual(cpus[least.get()], 0.0)) {
- cpus.erase(least.get());
- }
- }
-
- return deallocation;
-}
-
-
-double Cpuset::usage() const
-{
- double total = 0.0;
- foreachvalue (double used, cpus) {
- total += used;
- }
- return total;
-}
-
-
-std::ostream& operator << (std::ostream& out, const Cpuset& cpuset)
-{
- vector<unsigned int> cpus;
- foreachpair (const proc::CPU& cpu, double used, cpuset.cpus) {
- CHECK(!almostEqual(used, 0.0));
- cpus.push_back(cpu.id);
- }
- std::sort(cpus.begin(), cpus.end());
-
- return out << strings::join(",", cpus);
-}
-
-
-CgroupsIsolator::CgroupsIsolator()
- : ProcessBase(ID::generate("cgroups-isolator")),
- local(false),
- initialized(false),
- lockFile(None()) {}
-
-
-void CgroupsIsolator::initialize(
- const Flags& _flags,
- const Resources& _resources,
- bool _local,
- const PID<Slave>& _slave)
-{
- flags = _flags;
- local = _local;
- slave = _slave;
-
- // Make sure that cgroups is enabled by the kernel.
- if (!cgroups::enabled()) {
- EXIT(1) << "No cgroups support detected in this kernel";
- }
-
- // Make sure that we have root permissions.
- if (geteuid() != 0) {
- EXIT(1) << "Using cgroups requires root permissions";
- }
-
- // Configure cgroups hierarchy root path.
- hierarchy = flags.cgroups_hierarchy;
-
- LOG(INFO) << "Using " << hierarchy << " as cgroups hierarchy root";
-
- // Determine desired subsystems.
- foreach (const string& subsystem,
- strings::tokenize(flags.cgroups_subsystems, ",")) {
- // TODO(benh): Implement a 'sets::union' that takes a vector or
- // set rather than looping here!
- subsystems.insert(subsystem);
- }
-
- // Regardless of whether or not it was desired, we require the
- // 'freezer' subsystem in order to destroy a cgroup.
- subsystems.insert("freezer");
-
- // We require the 'cpuacct' subsystem to perform resource monitoring.
- subsystems.insert("cpuacct");
-
- // Check if the hierarchy is already mounted, and if not, mount it.
- Try<bool> mounted = cgroups::mounted(hierarchy);
-
- if (mounted.isError()) {
- LOG(FATAL) << "Failed to determine if " << hierarchy
- << " is already mounted: " << mounted.error();
- } else if (mounted.get()) {
- // Make sure that all the desired subsystems are attached to the
- // already mounted hierarchy.
- Try<set<string> > attached = cgroups::subsystems(hierarchy);
-
- if (attached.isError()) {
- LOG(FATAL) << "Failed to determine the attached subsystems "
- << "for the cgroup hierarchy at " << hierarchy << ": "
- << attached.error();
- }
- foreach (const string& subsystem, subsystems) {
- if (attached.get().count(subsystem) == 0) {
- EXIT(1) << "The cgroups hierarchy at " << hierarchy
- << " can not be used because it does not have the '"
- << subsystem << "' subsystem attached";
- }
- }
- } else {
- // Attempt to mount the hierarchy ourselves.
- if (os::exists(hierarchy)) {
- // The path specified by the given hierarchy already exists in
- // the file system. We try to remove it if it is an empty
- // directory. This will helps us better deal with slave restarts
- // since we won't need to manually remove the directory.
- Try<Nothing> rmdir = os::rmdir(hierarchy, false);
- if (rmdir.isError()) {
- EXIT(1) << "Failed to mount cgroups hierarchy at '" << hierarchy
- << "' because we could not remove existing directory"
- << ": " << rmdir.error();
- }
- }
-
- // Mount the cgroups hierarchy.
- Try<Nothing> mount = cgroups::mount(
- hierarchy, strings::join(",", subsystems));
-
- if (mount.isError()) {
- EXIT(1) << "Failed to mount cgroups hierarchy at '" << hierarchy
- << "': " << mount.error();
- }
- }
-
- // Create the root cgroup if it doesn't exist.
- Try<bool> exists = cgroups::exists(hierarchy, flags.cgroups_root);
- CHECK_SOME(exists)
- << "Failed to determine if '"<< flags.cgroups_root << "' cgroup "
- << "already exists in the hierarchy at '" << hierarchy << "'";
-
- if (!exists.get()) {
- // No root cgroup exists, create it.
- Try<Nothing> create = cgroups::create(hierarchy, flags.cgroups_root);
- CHECK_SOME(create)
- << "Failed to create the '" << flags.cgroups_root << "' cgroup";
- }
-
- // Create the nested test cgroup if it doesn't exist.
- exists = cgroups::exists(
- hierarchy, path::join(flags.cgroups_root, "test"));
- CHECK_SOME(exists)
- << "Failed to determine if '"<< flags.cgroups_root << "/test'"
- << " nested cgroup already exists in the hierarchy at '"
- << hierarchy << "'";
-
- if (!exists.get()) {
- // Make sure this kernel supports creating nested cgroups.
- Try<Nothing> create =
- cgroups::create(hierarchy, path::join(flags.cgroups_root, "test"));
-
- if (create.isError()) {
- EXIT(1) << "Failed to create a nested 'test' cgroup. Your kernel "
- << "might be too old to use the cgroups isolator: "
- << create.error();
- }
- }
-
- // Remove the nested 'test' cgroup.
- Try<Nothing> remove =
- cgroups::remove(hierarchy, path::join(flags.cgroups_root, "test"));
-
- CHECK_SOME(remove) << "Failed to remove the nested 'test' cgroup";
-
- // Try and put an _advisory_ file lock on the tasks' file of our
- // root cgroup to check and see if another slave is already running.
- Try<int> open =
- os::open(path::join(hierarchy, flags.cgroups_root, "tasks"), O_RDONLY);
-
- CHECK_SOME(open);
-
- lockFile = open.get();
- Try<Nothing> cloexec = os::cloexec(lockFile.get());
- CHECK_SOME(cloexec);
- if (flock(lockFile.get(), LOCK_EX | LOCK_NB) != 0) {
- EXIT(1) << "Another mesos-slave appears to be running!";
- }
-
- // Make sure the kernel supports OOM controls.
- exists = cgroups::exists(
- hierarchy, flags.cgroups_root, "memory.oom_control");
-
- CHECK_SOME(exists)
- << "Failed to determine if 'memory.oom_control' control exists";
-
- if (!exists.get()) {
- EXIT(1) << "Failed to find 'memory.oom_control', your kernel "
- << "might be too old to use the cgroups isolator";
- }
-
- // Make sure the memcg OOM-killer is not disabled.
- // The Mesos OOM handler, as implemented, is not capable of handling
- // the oom condition by itself safely given the limitations Linux
- // imposes on this code path.
- Try<Nothing> write = cgroups::write(
- hierarchy, flags.cgroups_root, "memory.oom_control", "0");
-
- CHECK_SOME(write) << "Failed to update memory.oom_control";
-
- if (subsystems.contains("cpu") && subsystems.contains("cpuset")) {
- EXIT(1) << "The use of both 'cpu' and 'cpuset' subsystems is not allowed.\n"
- << "Please use only one of:\n"
- << " cpu: When willing to share cpus for higher efficiency.\n"
- << " cpuset: When cpu pinning is desired.";
- }
-
- // Configure resource changed handlers. We only add handlers for
- // resources that have the appropriate subsystems attached.
- if (subsystems.contains("cpu")) {
- handlers["cpus"] = &CgroupsIsolator::cpusChanged;
- }
-
- if (subsystems.contains("cpuset")) {
- // TODO(bmahler): Consider making a cgroups primitive helper to perform
- // cgroups list format -> list of ints / strings conversion.
- hashset<unsigned int> cgroupCpus;
- Try<string> cpuset =
- cgroups::read(hierarchy, flags.cgroups_root, "cpuset.cpus");
-
- CHECK_SOME(cpuset) << "Failed to read cpuset.cpus";
- cpuset = strings::trim(cpuset.get());
-
- // Parse from "0-2,7,12-14" to a set(0,1,2,7,12,13,14).
- foreach (string range, strings::tokenize(cpuset.get(), ",")) {
- range = strings::trim(range);
-
- if (strings::contains(range, "-")) {
- // Case startId-endId (e.g. 0-2 in 0-2,7,12-14).
- vector<string> startEnd = strings::split(range, "-");
- CHECK(startEnd.size() == 2)
- << "Failed to parse cpu range '" << range
- << "' from cpuset.cpus '" << cpuset.get() << "'";
-
- Try<unsigned int> start =
- numify<unsigned int>(strings::trim(startEnd[0]));
- Try<unsigned int> end =
- numify<unsigned int>(strings::trim(startEnd[1]));
-
- CHECK(start.isSome() && end.isSome())
- << "Failed to parse cpu range '" << range
- << "' from cpuset.cpus '" << cpuset.get() << "'";
-
- for (unsigned int i = start.get(); i <= end.get(); i++) {
- cgroupCpus.insert(i);
- }
- } else {
- // Case id (e.g. 7 in 0-2,7,12-14).
- Try<unsigned int> cpuId = numify<unsigned int>(range);
-
- CHECK_SOME(cpuId)
- << "Failed to parse cpu '" << range << "' from cpuset.cpus '"
- << cpuset.get() << "'";
-
- cgroupCpus.insert(cpuId.get());
- }
- }
-
- Value::Scalar none;
- Value::Scalar cpusResource = _resources.get("cpus", none);
- if (cpusResource.value() > cgroupCpus.size()) {
- EXIT(1) << "You have specified " << cpusResource.value() << " cpus, but "
- << "this is more than allowed by the cgroup cpuset.cpus: "
- << cpuset.get();
- }
-
- // Initialize our cpu allocations.
- Try<list<proc::CPU> > cpus = proc::cpus();
-
- CHECK_SOME(cpus) << "Failed to extract CPUs from /proc/cpuinfo";
-
- foreach (const proc::CPU& cpu, cpus.get()) {
- if (this->cpus.size() >= cpusResource.value()) {
- break;
- }
-
- if (cgroupCpus.contains(cpu.id)) {
- LOG(INFO) << "Initializing cpu allocation for " << cpu;
- this->cpus[cpu] = 0.0;
- }
- }
-
- handlers["cpus"] = &CgroupsIsolator::cpusetChanged;
- }
-
- if (subsystems.contains("memory")) {
- handlers["mem"] = &CgroupsIsolator::memChanged;
- }
-
- // Add handlers for optional subsystem features.
- if (flags.cgroups_enable_cfs) {
- // Verify dependent subsystem is present and kernel supports CFS controls.
- if (!subsystems.contains("cpu")) {
- EXIT(1) << "The 'cfs' cgroups feature flag is dependent on the 'cpu' "
- << "subsystem.\n"
- << "Please enable the cpu subsystem to use the cfs feature.";
- }
-
- exists = cgroups::exists(hierarchy, flags.cgroups_root, "cpu.cfs_quota_us");
-
- CHECK_SOME(exists)
- << "Failed to determine if 'cpu.cfs_quota_us' control exists";
-
- if (!exists.get()) {
- EXIT(1) << "Failed to find 'cpu.cfs_quota_us'. Your kernel "
- << "might be too old to use the CFS cgroups feature";
- }
-
- // Make "cfsChanged" the cpu resource handler.
- // TODO(tdmackey): Allow multiple handlers per resource.
- handlers["cpus"] = &CgroupsIsolator::cfsChanged;
- }
-
- initialized = true;
-}
-
-
-void CgroupsIsolator::finalize()
-{
- // Unlock the advisory file.
- CHECK_SOME(lockFile) << "Uninitialized file descriptor!";
- if (flock(lockFile.get(), LOCK_UN) != 0) {
- PLOG(FATAL)
- << "Failed to unlock advisory lock file '"
- << path::join(hierarchy, flags.cgroups_root, "tasks") << "'";
- }
-
- Try<Nothing> close = os::close(lockFile.get());
- if (close.isError()) {
- LOG(ERROR) << "Failed to close advisory lock file '"
- << path::join(hierarchy, flags.cgroups_root, "tasks")
- << "': " << close.error();
- }
-}
-
-
-void CgroupsIsolator::launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const string& directory,
- const Resources& resources)
-{
- CHECK(initialized) << "Cannot launch executors before initialization";
-
- const ExecutorID& executorId = executorInfo.executor_id();
-
- // Register the cgroup information.
- CgroupInfo* info =
- registerCgroupInfo(frameworkId, executorId, uuid, None(), flags);
-
- LOG(INFO) << "Launching " << executorId
- << " (" << executorInfo.command().value() << ")"
- << " in " << directory
- << " with resources " << resources
- << " for framework " << frameworkId
- << " in cgroup " << info->name();
-
- // Create a new cgroup for the executor.
- Try<Nothing> create = cgroups::create(hierarchy, info->name());
-
- if (create.isError()) {
- LOG(FATAL) << "Failed to create cgroup for executor " << executorId
- << " of framework " << frameworkId
- << ": " << create.error();
- }
-
- // Setup the initial resource constraints.
- resourcesChanged(frameworkId, executorId, resources);
-
- // Start listening on OOM events.
- oomListen(frameworkId, executorId);
-
- // Use pipes to determine which child has successfully changed session.
- int pipes[2];
- if (pipe(pipes) < 0) {
- PLOG(FATAL) << "Failed to create a pipe";
- }
-
- // Set the FD_CLOEXEC flags on these pipes
- Try<Nothing> cloexec = os::cloexec(pipes[0]);
- CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[0]";
-
- cloexec = os::cloexec(pipes[1]);
- CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[1]";
-
- // Launch the executor using fork-exec.
- pid_t pid;
- if ((pid = ::fork()) == -1) {
- LOG(FATAL) << "Failed to fork to launch new executor";
- }
-
- if (pid > 0) {
- os::close(pipes[1]);
-
- // Get the child's pid via the pipe.
- if (read(pipes[0], &pid, sizeof(pid)) == -1) {
- PLOG(FATAL) << "Failed to get child PID from pipe";
- }
-
- os::close(pipes[0]);
-
- // In parent process.
- LOG(INFO) << "Forked executor at = " << pid;
-
- // Store the pid of the leading process of the executor.
- info->pid = pid;
-
- process::reap(pid)
- .onAny(defer(PID<CgroupsIsolator>(this),
- &CgroupsIsolator::reaped,
- pid,
- lambda::_1));
-
- // Tell the slave this executor has started.
- dispatch(slave,
- &Slave::executorStarted,
- frameworkId,
- executorId,
- pid);
- } else {
- // In child process, we make cleanup easier by putting process
- // into it's own session. DO NOT USE GLOG!
- os::close(pipes[0]);
-
- // NOTE: We setsid() in a loop because setsid() might fail if another
- // process has the same process group id as the calling process.
- while ((pid = setsid()) == -1) {
- perror("Could not put executor in its own session");
-
- std::cout << "Forking another process and retrying ..." << std::endl;
-
- if ((pid = fork()) == -1) {
- perror("Failed to fork to launch executor");
- abort();
- }
-
- if (pid > 0) {
- // In parent process.
- exit(0);
- }
- }
-
- if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) {
- perror("Failed to write PID on pipe");
- abort();
- }
-
- os::close(pipes[1]);
-
- launcher::ExecutorLauncher launcher(
- slaveId,
- frameworkId,
- executorInfo.executor_id(),
- uuid,
- executorInfo.command(),
- frameworkInfo.user(),
- directory,
- flags.work_dir,
- slave,
- flags.frameworks_home,
- flags.hadoop_home,
- !local,
- flags.switch_user,
- frameworkInfo.checkpoint(),
- flags.recovery_timeout);
-
- // First fetch the executor.
- if (launcher.setup() < 0) {
- EXIT(1) << "Failed to setup executor '" << executorId
- << "' for framework " << frameworkId;
- }
-
- // Put self into the newly created cgroup.
- // Note that the memory used for setting up the executor
- // (launcher.setup()) is charged to the slave's cgroup and
- // not to the executor's cgroup. When we assign the executor
- // to the its own cgroup, below, its memory charge will start
- // at 0. For more details, refer to
- // http://www.kernel.org/doc/Documentation/cgroups/memory.txt
- Try<Nothing> assign = cgroups::assign(hierarchy, info->name(), ::getpid());
-
- if (assign.isError()) {
- EXIT(1) << "Failed to assign executor '" << executorId
- << "' of framework " << frameworkId
- << " to its own cgroup '" << path::join(hierarchy, info->name())
- << "' : " << assign.error();
- }
-
- // Now launch the executor (this function should not return).
- launcher.launch();
- }
-}
-
-
-void CgroupsIsolator::killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- CHECK(initialized) << "Cannot kill executors before initialization";
-
- CgroupInfo* info = findCgroupInfo(frameworkId, executorId);
- if (info == NULL || info->killed) {
- LOG(ERROR) << "Asked to kill an unknown/killed executor!";
- return;
- }
-
- LOG(INFO) << "Killing executor " << executorId
- << " of framework " << frameworkId;
-
- // Stop the OOM listener if needed.
- if (info->oomNotifier.isPending()) {
- info->oomNotifier.discard();
- }
-
- info->killed = true;
-
- // Destroy the cgroup that is associated with the executor. Here, we
- // don't wait for it to succeed as we don't want to block the
- // isolator. Instead, we register a callback which will be invoked
- // when its result is ready.
- cgroups::destroy(hierarchy, info->name())
- .onAny(defer(PID<CgroupsIsolator>(this),
- &CgroupsIsolator::_killExecutor,
- info,
- lambda::_1));
-}
-
-
-void CgroupsIsolator::resourcesChanged(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const Resources& resources)
-{
- CHECK(initialized) << "Cannot change resources before initialization";
-
- CgroupInfo* info = findCgroupInfo(frameworkId, executorId);
- if (info == NULL || info->killed) {
- LOG(INFO) << "Asked to update resources for an unknown/killed executor";
- return;
- }
-
- if (info->resources == resources) {
- // This could happen when 'resourcesChanged()' is called after
- // launching the first task since the executor includes the
- // resources for the first task.
- return;
- }
-
- info->resources = resources;
-
- LOG(INFO) << "Changing cgroup controls for executor " << executorId
- << " of framework " << frameworkId
- << " with resources " << resources;
-
- // For each resource, invoke the corresponding handler.
- foreach (const Resource& resource, resources) {
- if (handlers.contains(resource.name())) {
- Try<Nothing> result = (this->*handlers[resource.name()])(info, resource);
-
- if (result.isError()) {
- LOG(ERROR) << result.error();
- }
- }
- }
-}
-
-
-Future<ResourceStatistics> CgroupsIsolator::usage(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- if (!infos.contains(frameworkId) ||
- !infos[frameworkId].contains(executorId) ||
- infos[frameworkId][executorId]->killed) {
- return Failure("Unknown or killed executor");
- }
-
- // Get the number of clock ticks, used for cpu accounting.
- static long ticks = sysconf(_SC_CLK_TCK);
-
- PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)";
-
- CgroupInfo* info = infos[frameworkId][executorId];
- CHECK_NOTNULL(info);
-
- ResourceStatistics result;
- result.set_timestamp(Clock::now().secs());
-
- // Set the resource allocations.
- Option<Bytes> mem = info->resources.mem();
- if (mem.isSome()) {
- result.set_mem_limit_bytes(mem.get().bytes());
- }
-
- Option<double> cpus = info->resources.cpus();
- if (cpus.isSome()) {
- result.set_cpus_limit(cpus.get());
- }
-
- Try<hashmap<string, uint64_t> > stat =
- cgroups::stat(hierarchy, info->name(), "cpuacct.stat");
-
- if (stat.isError()) {
- return Failure("Failed to read cpuacct.stat: " + stat.error());
- }
-
- // TODO(bmahler): Add namespacing to cgroups to enforce the expected
- // structure, e.g., cgroups::cpuacct::stat.
- if (stat.get().contains("user") && stat.get().contains("system")) {
- result.set_cpus_user_time_secs(
- (double) stat.get()["user"] / (double) ticks);
- result.set_cpus_system_time_secs(
- (double) stat.get()["system"] / (double) ticks);
- }
-
- // The rss from memory.stat is wrong in two dimensions:
- // 1. It does not include child cgroups.
- // 2. It does not include any file backed pages.
- Try<Bytes> usage = cgroups::memory::usage_in_bytes(hierarchy, info->name());
- if (usage.isError()) {
- return Failure("Failed to parse memory.usage_in_bytes: " + usage.error());
- }
-
- // TODO(bmahler): Add namespacing to cgroups to enforce the expected
- // structure, e.g, cgroups::memory::stat.
- result.set_mem_rss_bytes(usage.get().bytes());
-
- stat = cgroups::stat(hierarchy, info->name(), "memory.stat");
- if (stat.isError()) {
- return Failure("Failed to read memory.stat: " + stat.error());
- }
-
- if (stat.get().contains("total_cache")) {
- result.set_mem_file_bytes(stat.get()["total_cache"]);
- }
-
- if (stat.get().contains("total_rss")) {
- result.set_mem_anon_bytes(stat.get()["total_rss"]);
- }
-
- if (stat.get().contains("total_mapped_file")) {
- result.set_mem_mapped_file_bytes(stat.get()["total_mapped_file"]);
- }
-
- // Add the cpu.stat information.
- stat = cgroups::stat(hierarchy, info->name(), "cpu.stat");
-
- if (stat.isError()) {
- return Failure("Failed to read cpu.stat: " + stat.error());
- }
-
- if (stat.get().contains("nr_periods")) {
- result.set_cpus_nr_periods(
- (uint32_t) stat.get()["nr_periods"]);
- }
-
- if (stat.get().contains("nr_throttled")) {
- result.set_cpus_nr_throttled(
- (uint32_t) stat.get()["nr_throttled"]);
- }
-
- if (stat.get().contains("throttled_time")) {
- result.set_cpus_throttled_time_secs(
- Nanoseconds(stat.get()["throttled_time"]).secs());
- }
-
- return result;
-}
-
-
-Future<Nothing> CgroupsIsolator::recover(
- const Option<SlaveState>& state)
-{
- LOG(INFO) << "Recovering isolator";
-
- hashset<std::string> cgroups; // Recovered cgroups.
-
- if (state.isSome()) {
- foreachvalue (const FrameworkState& framework, state.get().frameworks) {
- foreachvalue (const ExecutorState& executor, framework.executors) {
- LOG(INFO) << "Recovering executor '" << executor.id
- << "' of framework " << framework.id;
-
- if (executor.info.isNone()) {
- LOG(WARNING) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its info cannot be recovered";
- continue;
- }
-
- if (executor.latest.isNone()) {
- LOG(WARNING) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its latest run cannot be recovered";
- continue;
- }
-
- // We are only interested in the latest run of the executor!
- const UUID& uuid = executor.latest.get();
- CHECK(executor.runs.contains(uuid));
- const RunState& run = executor.runs.get(uuid).get();
-
- if (run.completed) {
- VLOG(1) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its latest run " << uuid << " is completed";
- continue;
- }
-
- // TODO(vinod): Currently, we assume that the cgroups
- // information (e.g., hierarchy, root) used while recovering
- // is same as the one that was used by the previous slave
- // while checkpointing. Instead, we should checkpoint the
- // cgroups information.
- CgroupInfo* info = registerCgroupInfo(
- framework.id, executor.id, uuid, run.forkedPid, flags);
-
- // If the cgroup has already been removed inform the slave.
- Try<bool> exists = cgroups::exists(hierarchy, info->name());
- CHECK_SOME(exists) << "Failed to find the existence of cgroup "
- << info->name();
- if (!exists.get()) {
- dispatch(slave,
- &Slave::executorTerminated,
- info->frameworkId,
- info->executorId,
- info->status,
- info->destroyed,
- info->message);
-
- unregisterCgroupInfo(framework.id, executor.id);
-
- continue;
- }
-
- cgroups.insert(info->name());
-
- // Add the pid to the reaper to monitor exit status.
- if (run.forkedPid.isSome()) {
- process::reap(run.forkedPid.get())
- .onAny(defer(PID<CgroupsIsolator>(this),
- &CgroupsIsolator::reaped,
- run.forkedPid.get(),
- lambda::_1));
- }
-
- // Start listening for OOMs. If the executor OOMed while the
- // slave was down or recovering, the cgroup will already be
- // under_oom, resulting in immediate notification.
- // TODO(bmahler): I've been unable to find documentation
- // guaranteeing this, but the kernel source indicates they
- // notify if already under_oom.
- if (subsystems.contains("memory")) {
- oomListen(framework.id, executor.id);
- }
- }
- }
- }
-
- // Cleanup any orphaned cgroups that are not going to be recovered (this
- // should be safe because we've been able to acquire the file lock).
- Try<vector<string> > orphans = cgroups::get(hierarchy, flags.cgroups_root);
- if (orphans.isError()) {
- return Failure(orphans.error());
- }
-
- foreach (const string& orphan, orphans.get()) {
- if (!cgroups.contains(orphan)) {
- LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
- cgroups::destroy(hierarchy, orphan)
- .onAny(defer(PID<CgroupsIsolator>(this),
- &CgroupsIsolator::_destroy,
- orphan,
- lambda::_1));
- }
- }
-
- return Nothing();
-}
-
-
-void CgroupsIsolator::reaped(pid_t pid, const Future<Option<int> >& status)
-{
- CgroupInfo* info = findCgroupInfo(pid);
- if (info != NULL) {
- FrameworkID frameworkId = info->frameworkId;
- ExecutorID executorId = info->executorId;
-
- if (!status.isReady()) {
- LOG(ERROR) << "Failed to get the status for executor " << executorId
- << " of framework " << frameworkId << ": "
- << (status.isFailed() ? status.failure() : "discarded");
- return;
- }
-
- if (status.get().isSome()) {
- int _status = status.get().get();
- LOG(INFO) << "Executor '" << executorId
- << "' of framework " << frameworkId
- << (WIFEXITED(_status) ? " has exited with status "
- : " has terminated with signal ")
- << (WIFEXITED(_status) ? stringify(WEXITSTATUS(_status))
- : strsignal(WTERMSIG(_status)));
- } else {
- LOG(WARNING) << "Executor '" << executorId
- << "' of framework " << frameworkId
- << " terminated with unknown status";
- }
-
- // Set the exit status, so that '_killExecutor()' can send it to the slave.
- info->status = status.get();
-
- if (!info->killed) {
- killExecutor(frameworkId, executorId);
- }
- }
-}
-
-
-Try<Nothing> CgroupsIsolator::cpusChanged(
- CgroupInfo* info,
- const Resource& resource)
-{
- CHECK(resource.name() == "cpus");
-
- if (resource.type() != Value::SCALAR) {
- return Error("Expecting resource 'cpus' to be a scalar");
- }
-
- double cpus = resource.scalar().value();
- size_t shares =
- std::max((size_t) (CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
-
- Try<Nothing> write = cgroups::write(
- hierarchy, info->name(), "cpu.shares", stringify(shares));
-
- if (write.isError()) {
- return Error("Failed to update 'cpu.shares': " + write.error());
- }
-
- LOG(INFO) << "Updated 'cpu.shares' to " << shares
- << " for executor " << info->executorId
- << " of framework " << info->frameworkId;
-
- return Nothing();
-}
-
-
-Try<Nothing> CgroupsIsolator::cpusetChanged(
- CgroupInfo* info,
- const Resource& resource)
-{
- CHECK_NOTNULL(info->cpuset);
- CHECK(resource.name() == "cpus");
- CHECK(resource.type() == Value::SCALAR);
-
- double delta = resource.scalar().value() - info->cpuset->usage();
-
- if (delta < 0) {
- map<proc::CPU, double> deallocated = info->cpuset->shrink(fabs(delta));
- foreachpair (const proc::CPU& cpu, double freed, deallocated) {
- cpus[cpu] -= freed;
- CHECK(cpus[cpu] > -0.001); // Check approximately >= 0.
- }
- } else {
- map<proc::CPU, double> allocated = info->cpuset->grow(delta, cpus);
- foreachpair (const proc::CPU& cpu, double used, allocated) {
- cpus[cpu] += used;
- CHECK(cpus[cpu] < 1.001); // Check approximately <= 1.
- }
- }
-
- Try<Nothing> write = cgroups::write(
- hierarchy, info->name(), "cpuset.cpus", stringify(*(info->cpuset)));
-
- if (write.isError()) {
- return Error("Failed to update 'cpuset.cpus': " + write.error());
- }
-
- LOG(INFO) << "Updated 'cpuset.cpus' to " << *(info->cpuset)
- << " for executor " << info->executorId
- << " of framework " << info->frameworkId;
-
- return Nothing();
-}
-
-
-Try<Nothing> CgroupsIsolator::cfsChanged(
- CgroupInfo* info,
- const Resource& resource)
-{
- CHECK(resource.name() == "cpus");
- CHECK(resource.type() == Value::SCALAR);
-
- Try<Nothing> write = cgroups::write(
- hierarchy,
- info->name(),
- "cpu.cfs_period_us",
- stringify(CPU_CFS_PERIOD.us()));
-
- if (write.isError()) {
- return Error("Failed to update 'cpu.cfs_period_us': " + write.error());
- }
-
- double cpus = resource.scalar().value();
- size_t quota = static_cast<size_t>(
- std::max(CPU_CFS_PERIOD.us() * cpus, MIN_CPU_CFS_QUOTA.us()));
-
- write = cgroups::write(
- hierarchy, info->name(), "cpu.cfs_quota_us", stringify(quota));
-
- if (write.isError()) {
- return Error("Failed to update 'cpu.cfs_quota_us': " + write.error());
- }
-
- LOG(INFO) << "Updated 'cpu.cfs_period_us' to " << CPU_CFS_PERIOD.us()
- << " and 'cpu.cfs_quota_us' to " << quota
- << " for executor " << info->executorId
- << " of framework " << info->frameworkId;
-
- // Set cpu.shares as well.
- // TODO(tdmackey): Allow multiple handlers per resource.
- cpusChanged(info, resource);
-
- return Nothing();
-}
-
-
-Try<Nothing> CgroupsIsolator::memChanged(
- CgroupInfo* info,
- const Resource& resource)
-{
- CHECK(resource.name() == "mem");
-
- if (resource.type() != Value::SCALAR) {
- return Error("Expecting resource 'mem' to be a scalar");
- }
-
- Bytes mem = Bytes((uint64_t) resource.scalar().value() * 1024LL * 1024LL);
- Bytes limit = std::max(mem, MIN_MEMORY);
-
- // Always set the soft limit.
- Try<Nothing> write =
- cgroups::memory::soft_limit_in_bytes(hierarchy, info->name(), limit);
-
- if (write.isError()) {
- return Error("Failed to set 'memory.soft_limit_in_bytes': "
- + write.error());
- }
-
- LOG(INFO) << "Updated 'memory.soft_limit_in_bytes' to " << limit
- << " for executor " << info->executorId
- << " of framework " << info->frameworkId;
-
- // Read the existing limit.
- Try<Bytes> currentLimit =
- cgroups::memory::limit_in_bytes(hierarchy, info->name());
-
- if (currentLimit.isError()) {
- return Error(
- "Failed to read 'memory.limit_in_bytes': " + currentLimit.error());
- }
-
- // Determine whether to set the hard limit. If this is the first
- // time (info->pid.isNone()), or we're raising the existing limit,
- // then we can update the hard limit safely. Otherwise, if we need
- // to decrease 'memory.limit_in_bytes' we may induce an OOM if too
- // much memory is in use. As a result, we only update the soft
- // limit when the memory reservation is being reduced. This is
- // probably okay if the machine has available resources.
- // TODO(benh): Introduce a MemoryWatcherProcess which monitors the
- // discrepancy between usage and soft limit and introduces a
- // "manual oom" if necessary.
- if (info->pid.isNone() || limit > currentLimit.get()) {
- write = cgroups::memory::limit_in_bytes(hierarchy, info->name(), limit);
-
- if (write.isError()) {
- return Error("Failed to set 'memory.limit_in_bytes': " + write.error());
- }
-
- LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit
- << " for executor " << info->executorId
- << " of framework " << info->frameworkId;
- }
-
- return Nothing();
-}
-
-
-void CgroupsIsolator::oomListen(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- CgroupInfo* info = findCgroupInfo(frameworkId, executorId);
- CHECK(info != NULL) << "Cgroup info is not registered";
-
- info->oomNotifier =
- cgroups::listen(hierarchy, info->name(), "memory.oom_control");
-
- // If the listening fails immediately, something very wrong happened.
- // Therefore, we report a fatal error here.
- if (info->oomNotifier.isFailed()) {
- LOG(FATAL) << "Failed to listen for OOM events for executor " << executorId
- << " of framework " << frameworkId
- << ": "<< info->oomNotifier.failure();
- }
-
- LOG(INFO) << "Started listening for OOM events for executor " << executorId
- << " of framework " << frameworkId;
-
- CHECK_SOME(info->uuid);
- info->oomNotifier.onAny(
- defer(PID<CgroupsIsolator>(this),
- &CgroupsIsolator::oomWaited,
- frameworkId,
- executorId,
- info->uuid.get(),
- lambda::_1));
-}
-
-
-void CgroupsIsolator::oomWaited(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid,
- const Future<uint64_t>& future)
-{
- LOG(INFO) << "OOM notifier is triggered for executor "
- << executorId << " of framework " << frameworkId
- << " with uuid " << uuid;
-
- if (future.isDiscarded()) {
- LOG(INFO) << "Discarded OOM notifier for executor "
- << executorId << " of framework " << frameworkId
- << " with uuid " << uuid;
- } else if (future.isFailed()) {
- LOG(ERROR) << "Listening on OOM events failed for executor "
- << executorId << " of framework " << frameworkId
- << " with uuid " << uuid << ": " << future.failure();
- } else {
- // Out-of-memory event happened, call the handler.
- oom(frameworkId, executorId, uuid);
- }
-}
-
-
-void CgroupsIsolator::oom(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid)
-{
- CgroupInfo* info = findCgroupInfo(frameworkId, executorId);
- if (info == NULL) {
- // It is likely that processExited is executed before this function (e.g.
- // The kill and OOM events happen at the same time, and the process exit
- // event arrives first.) Therefore, we should not report a fatal error here.
- LOG(INFO) << "OOM detected for an already terminated executor";
- return;
- }
-
- // We can also ignore an OOM event that we are late to process for a
- // previous instance of an executor.
- CHECK_SOME(info->uuid);
- if (uuid != info->uuid.get()) {
- LOG(INFO) << "OOM detected for a previous executor instance";
- return;
- }
-
- // It's possible for an executor to OOM right as it was being
- // killed, ignore this case.
- if (info->killed) {
- LOG(INFO) << "OOM detected for an already killed executor";
- return;
- }
-
- LOG(INFO) << "OOM detected for executor " << executorId
- << " of framework " << frameworkId
- << " with uuid " << uuid;
-
- // Construct a "message" string to describe why the isolator
- // destroyed the executor's cgroup (in order to assist in debugging).
- ostringstream message;
- message << "Memory limit exceeded: ";
-
- // Output the requested memory limit.
- Try<Bytes> limit = cgroups::memory::limit_in_bytes(hierarchy, info->name());
-
- if (limit.isError()) {
- LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': " << limit.error();
- } else {
- message << "Requested: " << limit.get() << " ";
- }
-
- // Output the maximum memory usage.
- Try<Bytes> usage = cgroups::memory::max_usage_in_bytes(hierarchy, info->name());
-
- if (usage.isError()) {
- LOG(ERROR) << "Failed to read 'memory.max_usage_in_bytes': " << usage.error();
- } else {
- message << "Maximum Used: " << usage.get() << "\n";
- }
-
- // Output 'memory.stat' of the cgroup to help with debugging.
- // NOTE: With Kernel OOM-killer enabled these stats may not reflect
- // memory state at time of OOM.
- Try<string> read = cgroups::read(hierarchy, info->name(), "memory.stat");
- if (read.isError()) {
- LOG(ERROR) << "Failed to read 'memory.stat': " << read.error();
- } else {
- message << "\nMEMORY STATISTICS: \n" << read.get() << "\n";
- }
-
- LOG(INFO) << strings::trim(message.str()); // Trim the extra '\n' at the end.
-
- info->destroyed = true;
- info->message = message.str();
-
- killExecutor(frameworkId, executorId);
-}
-
-
-void CgroupsIsolator::_destroy(
- const string& cgroup,
- const Future<bool>& future)
-{
- CHECK(initialized) << "Cannot destroy cgroups before initialization";
-
- if (future.isReady()) {
- LOG(INFO) << "Successfully destroyed cgroup " << cgroup;
- } else {
- LOG(FATAL) << "Failed to destroy cgroup " << cgroup
- << ": " << future.failure();
- }
-}
-
-
-void CgroupsIsolator::_killExecutor(
- CgroupInfo* info,
- const Future<bool>& future)
-{
- CHECK(initialized) << "Cannot kill executors before initialization";
-
- CHECK_NOTNULL(info);
-
- if (future.isReady()) {
- LOG(INFO) << "Successfully destroyed cgroup " << info->name();
-
- CHECK(info->killed)
- << "Unexpectedly alive executor " << info->executorId
- << " of framework " << info->frameworkId;
-
- // NOTE: The exit status of the executor might not be set if this
- // function is called before 'processTerminated()' is called.
- // TODO(vinod): When reaper returns a future instead of issuing a callback,
- // wait for that future to be ready and grab the exit status.
- dispatch(slave,
- &Slave::executorTerminated,
- info->frameworkId,
- info->executorId,
- info->status,
- info->destroyed,
- info->message);
-
- // We make a copy here because 'info' will be deleted when we unregister.
- unregisterCgroupInfo(
- utils::copy(info->frameworkId),
- utils::copy(info->executorId));
- } else {
- LOG(FATAL) << "Failed to destroy cgroup " << info->name()
- << ": " << future.failure();
- }
-}
-
-
-CgroupsIsolator::CgroupInfo* CgroupsIsolator::registerCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid,
- const Option<pid_t>& pid,
- const Flags& flags)
-{
- CgroupInfo* info = new CgroupInfo();
- info->frameworkId = frameworkId;
- info->executorId = executorId;
- info->uuid = uuid;
- info->pid = pid;
- info->killed = false;
- info->destroyed = false;
- info->status = -1;
- info->message = "";
- info->flags = flags;
- if (subsystems.contains("cpuset")) {
- info->cpuset = new Cpuset();
- } else {
- info->cpuset = NULL;
- }
- infos[frameworkId][executorId] = info;
- return info;
-}
-
-
-void CgroupsIsolator::unregisterCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- if (infos.contains(frameworkId)) {
- if (infos[frameworkId].contains(executorId)) {
- delete infos[frameworkId][executorId];
- infos[frameworkId].erase(executorId);
- if (infos[frameworkId].empty()) {
- infos.erase(frameworkId);
- }
- }
- }
-}
-
-
-CgroupsIsolator::CgroupInfo* CgroupsIsolator::findCgroupInfo(
- pid_t pid)
-{
- foreachkey (const FrameworkID& frameworkId, infos) {
- foreachvalue (CgroupInfo* info, infos[frameworkId]) {
- if (info->pid.isSome() && info->pid.get() == pid) {
- return info;
- }
- }
- }
- return NULL;
-}
-
-
-CgroupsIsolator::CgroupInfo* CgroupsIsolator::findCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- if (infos.find(frameworkId) != infos.end()) {
- if (infos[frameworkId].find(executorId) != infos[frameworkId].end()) {
- return infos[frameworkId][executorId];
- }
- }
- return NULL;
-}
-
-} // namespace mesos {
-} // namespace internal {
-} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/cgroups_isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/cgroups_isolator.hpp b/src/slave/cgroups_isolator.hpp
deleted file mode 100644
index 1a66dc6..0000000
--- a/src/slave/cgroups_isolator.hpp
+++ /dev/null
@@ -1,320 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CGROUPS_ISOLATOR_HPP__
-#define __CGROUPS_ISOLATOR_HPP__
-
-#include <unistd.h>
-
-#include <map>
-#include <sstream>
-#include <string>
-
-#include <process/future.hpp>
-#include <process/pid.hpp>
-
-#include <stout/hashmap.hpp>
-#include <stout/hashset.hpp>
-#include <stout/lambda.hpp>
-#include <stout/nothing.hpp>
-#include <stout/option.hpp>
-#include <stout/path.hpp>
-#include <stout/proc.hpp>
-#include <stout/uuid.hpp>
-
-#include "launcher/launcher.hpp"
-
-#include "slave/flags.hpp"
-#include "slave/isolator.hpp"
-#include "slave/slave.hpp"
-
-namespace mesos {
-namespace internal {
-namespace slave {
-
-// TODO(bmahler): Migrate this into it's own file, along with moving
-// all cgroups code inside of a 'cgroups' directory.
-class Cpuset
-{
-public:
- // Grows this cpu set by the provided delta.
- // @param delta Amount of cpus to grow by.
- // @param usage Cpu usage, as allocated by the cgroups isolator.
- // @return The new cpu allocations made by this Cpuset.
- std::map<proc::CPU, double> grow(
- double delta,
- const std::map<proc::CPU, double>& usage);
-
- // Shrinks this cpu set by the provided delta.
- // @param delta Amount of cpus to shrink by.
- // @return The new cpu deallocations made by this Cpuset.
- std::map<proc::CPU, double> shrink(double delta);
-
- // @return The total cpu usage across all the cpus in this Cpuset.
- double usage() const;
-
- friend std::ostream& operator << (std::ostream& out, const Cpuset& cpuset);
-
-private:
- std::map<proc::CPU, double> cpus; // CPU id -> % allocated.
-};
-
-
-class CgroupsIsolator : public Isolator
-{
-public:
- CgroupsIsolator();
-
- virtual void initialize(
- const Flags& flags,
- const Resources& resources,
- bool local,
- const process::PID<Slave>& slave);
-
- virtual void finalize();
-
- virtual void launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const std::string& directory,
- const Resources& resources);
-
- virtual void killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- virtual void resourcesChanged(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const Resources& resources);
-
- virtual process::Future<ResourceStatistics> usage(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- virtual process::Future<Nothing> recover(
- const Option<state::SlaveState>& state);
-
-private:
- // No copying, no assigning.
- CgroupsIsolator(const CgroupsIsolator&);
- CgroupsIsolator& operator = (const CgroupsIsolator&);
-
- void reaped(pid_t pid, const Future<Option<int> >& status);
-
- // The cgroup information for each live executor.
- struct CgroupInfo
- {
- ~CgroupInfo()
- {
- if (cpuset != NULL) {
- delete cpuset;
- cpuset = NULL;
- }
- }
-
- // Returns the canonicalized name of the cgroup in the filesystem.
- std::string name() const
- {
- CHECK_SOME(uuid);
- std::ostringstream out;
- out << "framework_" << frameworkId
- << "_executor_" << executorId
- << "_tag_" << uuid.get();
- return path::join(flags.cgroups_root, out.str());
- }
-
- FrameworkID frameworkId;
- ExecutorID executorId;
-
- // The UUID to distinguish between different launches of the same
- // executor (which have the same frameworkId and executorId).
- Option<UUID> uuid;
-
- // PID of the forked process of the executor.
- Option<pid_t> pid;
-
- bool killed; // True if "killing" has been initiated via 'killExecutor()'.
-
- // Indicates if this executor has been destroyed by the isolator.
- // NOTE: An executor may have terminated due to reasons
- // other than destruction by the isolator (e.g. killed by
- // slave, exited, etc.).
- bool destroyed;
-
- std::string message; // The reason behind the destruction.
-
- Option<int> status; // Exit status of the executor.
-
- Flags flags; // Slave flags.
-
- Resources resources; // Resources allocated to the cgroup.
-
- // Used to cancel the OOM listening.
- process::Future<uint64_t> oomNotifier;
-
- // CPUs allocated if using 'cpuset' subsystem.
- Cpuset* cpuset;
- };
-
- // The callback which will be invoked when "cpus" resource has changed.
- // @param info The Cgroup information.
- // @param resources The handle for the resources.
- // @return Whether the operation succeeds.
- Try<Nothing> cpusChanged(
- CgroupInfo* info,
- const Resource& resource);
-
- // The callback which will be invoked when "cpus" resource has changed.
- // This is only invoked when we are using the cpuset subsystem.
- // @param info The Cgroup information.
- // @param resources The handle for the resources.
- // @return Whether the operation succeeds.
- Try<Nothing> cpusetChanged(
- CgroupInfo* info,
- const Resource& resource);
-
- // The callback which will be invoked when "cpus" resource has changed,
- // and the cfs cgroups feature flag is enabled..
- // @param info The Cgroup information.
- // @param resources The handle for the resources.
- // @return Whether the operation succeeds.
- Try<Nothing> cfsChanged(
- CgroupInfo* info,
- const Resource& resource);
-
- // The callback which will be invoked when "mem" resource has changed.
- // @param info The Cgroup information.
- // @param resources The handle for the resources.
- // @return Whether the operation succeeds.
- Try<Nothing> memChanged(
- CgroupInfo* info,
- const Resource& resource);
-
- // Start listening on OOM events. This function will create an eventfd and
- // start polling on it.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- void oomListen(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- // This function is invoked when the polling on eventfd has a result.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- // @param uuid The uuid of the given executor.
- void oomWaited(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid,
- const process::Future<uint64_t>& future);
-
- // This function is invoked when the OOM event happens.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- // @param uuid The uuid of the given executor.
- void oom(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid);
-
- // This callback is invoked when destroy cgroup has a result.
- // @param info The information of cgroup that is being destroyed.
- // @param future The future describing the destroy process.
- void _killExecutor(
- CgroupInfo* info,
- const process::Future<bool>& future);
-
- // This callback is invoked when destroying orphaned cgroups from the
- // previous slave execution.
- // @param cgroup The cgroup that is being destroyed.
- // @param future The future describing the destroy process.
- void _destroy(
- const std::string& cgroup,
- const process::Future<bool>& future);
-
- // Register a cgroup in the isolator.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- // @param uuid The uuid of the given executor run.
- // @param pid The executor pid.
- // @param flags The slave flags.
- // @return A pointer to the cgroup info registered.
- CgroupInfo* registerCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const UUID& uuid,
- const Option<pid_t>& pid,
- const Flags& flags);
-
- // Unregister a cgroup in the isolator.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- void unregisterCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- // Find a registered cgroup by the PID of the leading process.
- // @param pid The PID of the leading process in the cgroup.
- // @return A pointer to the cgroup info if found, NULL otherwise.
- CgroupInfo* findCgroupInfo(pid_t pid);
-
- // Find a registered cgroup by the frameworkId and the executorId.
- // @param frameworkId The id of the given framework.
- // @param executorId The id of the given executor.
- // @return A pointer to the cgroup info if found, NULL otherwise.
- CgroupInfo* findCgroupInfo(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- Flags flags;
- bool local;
- process::PID<Slave> slave;
- bool initialized;
-
- // File descriptor to 'mesos/tasks' file in the cgroup on which we place
- // an advisory lock.
- Option<int> lockFile;
-
- // The cgroup information for each live executor.
- hashmap<FrameworkID, hashmap<ExecutorID, CgroupInfo*> > infos;
-
- // The path to the cgroups hierarchy root.
- std::string hierarchy;
-
- // The cgroups subsystems being used.
- hashset<std::string> subsystems;
-
- // Allocated cpus (if using cpuset subsystem).
- std::map<proc::CPU, double> cpus;
-
- // Handlers for each resource name, used for resource changes.
- hashmap<std::string,
- Try<Nothing>(CgroupsIsolator::*)(
- CgroupInfo*,
- const Resource&)> handlers;
-};
-
-} // namespace mesos {
-} // namespace internal {
-} // namespace slave {
-
-#endif // __CGROUPS_ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/containerizer/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.cpp b/src/slave/containerizer/containerizer.cpp
new file mode 100644
index 0000000..d0a1023
--- /dev/null
+++ b/src/slave/containerizer/containerizer.cpp
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <vector>
+
+#include <process/dispatch.hpp>
+#include <process/owned.hpp>
+
+#include <stout/fs.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/net.hpp>
+#include <stout/stringify.hpp>
+#include <stout/uuid.hpp>
+
+#include "slave/flags.hpp"
+#include "slave/slave.hpp"
+
+#ifdef __linux__
+#include "slave/containerizer/cgroups_launcher.hpp"
+#endif // __linux__
+#include "slave/containerizer/containerizer.hpp"
+#include "slave/containerizer/isolator.hpp"
+#include "slave/containerizer/launcher.hpp"
+#include "slave/containerizer/mesos_containerizer.hpp"
+
+#include "slave/containerizer/isolators/posix.hpp"
+#ifdef __linux__
+#include "slave/containerizer/isolators/cgroups/cpushare.hpp"
+#include "slave/containerizer/isolators/cgroups/mem.hpp"
+#endif // __linux__
+
+using std::map;
+using std::string;
+using std::vector;
+
+using namespace process;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// TODO(idownes): Move this to the Containerizer interface to complete
+// the delegation of containerization, i.e., external containerizers should be
+// able to report the resources they can isolate.
+Try<Resources> Containerizer::resources(const Flags& flags)
+{
+ Try<Resources> parsed = Resources::parse(
+ flags.resources.get(""), flags.default_role);
+
+ if (parsed.isError()) {
+ return Error(parsed.error());
+ }
+
+ Resources resources = parsed.get();
+
+ // CPU resource.
+ if (!resources.cpus().isSome()) {
+ // No CPU specified so probe OS or resort to DEFAULT_CPUS.
+ double cpus;
+ Try<long> cpus_ = os::cpus();
+ if (!cpus_.isSome()) {
+ LOG(WARNING) << "Failed to auto-detect the number of cpus to use: '"
+ << cpus_.error()
+ << "'; defaulting to " << DEFAULT_CPUS;
+ cpus = DEFAULT_CPUS;
+ } else {
+ cpus = cpus_.get();
+ }
+
+ resources += Resources::parse(
+ "cpus",
+ stringify(cpus),
+ flags.default_role).get();
+ }
+
+ // Memory resource.
+ if (!resources.mem().isSome()) {
+ // No memory specified so probe OS or resort to DEFAULT_MEM.
+ Bytes mem;
+ Try<os::Memory> mem_ = os::memory();
+ if (mem_.isError()) {
+ LOG(WARNING) << "Failed to auto-detect the size of main memory: '"
+ << mem_.error()
+ << "' ; defaulting to DEFAULT_MEM";
+ mem = DEFAULT_MEM;
+ } else {
+ mem = mem_.get().total;
+
+ // Leave 1 GB free if we have more than 1 GB, otherwise, use all!
+ // TODO(benh): Have better default scheme (e.g., % of mem not greater
+ // than 1 GB?)
+ if (mem > Gigabytes(1)) {
+ mem = mem - Gigabytes(1);
+ }
+ }
+
+ resources += Resources::parse(
+ "mem",
+ stringify(mem.megabytes()),
+ flags.default_role).get();
+ }
+
+ // Disk resource.
+ if (!resources.disk().isSome()) {
+ // No disk specified so probe OS or resort to DEFAULT_DISK.
+ Bytes disk;
+
+ // NOTE: We calculate disk size of the file system on
+ // which the slave work directory is mounted.
+ Try<Bytes> disk_ = fs::size(flags.work_dir);
+ if (!disk_.isSome()) {
+ LOG(WARNING) << "Failed to auto-detect the disk space: '"
+ << disk_.error()
+ << "' ; defaulting to " << DEFAULT_DISK;
+ disk = DEFAULT_DISK;
+ } else {
+ disk = disk_.get();
+ // Leave 5 GB free if we have more than 10 GB, otherwise, use all!
+ // TODO(benh): Have better default scheme (e.g., % of disk not
+ // greater than 10 GB?)
+ if (disk > Gigabytes(10)) {
+ disk = disk - Gigabytes(5);
+ }
+ }
+
+ resources += Resources::parse(
+ "disk",
+ stringify(disk.megabytes()),
+ flags.default_role).get();
+ }
+
+ // Network resource.
+ if (!resources.ports().isSome()) {
+ // No ports specified so resort to DEFAULT_PORTS.
+ resources += Resources::parse(
+ "ports",
+ stringify(DEFAULT_PORTS),
+ flags.default_role).get();
+ }
+
+ return resources;
+}
+
+
+Try<Containerizer*> Containerizer::create(
+ const Flags& flags,
+ bool local)
+{
+ string isolation;
+ if (flags.isolation == "process") {
+ LOG(WARNING) << "The 'process' isolation flag is deprecated, "
+ << "please update your flags to"
+ << " '--isolation=posix/cpu,posix/mem'.";
+ isolation = "posix/cpu,posix/mem";
+ } else if (flags.isolation == "cgroups") {
+ LOG(WARNING) << "The 'cgroups' isolation flag is deprecated, "
+ << "please update your flags to"
+ << " '--isolation=cgroups/cpu,cgroups/mem'.";
+ isolation = "cgroups/cpu,cgroups/mem";
+ } else {
+ isolation = flags.isolation;
+ }
+
+ LOG(INFO) << "Using isolation: " << isolation;
+
+ // Create a MesosContainerizerProcess using isolators and a launcher.
+ hashmap<std::string, Try<Isolator*> (*)(const Flags&)> creators;
+
+ creators["posix/cpu"] = &PosixCpuIsolatorProcess::create;
+ creators["posix/mem"] = &PosixMemIsolatorProcess::create;
+#ifdef __linux__
+ creators["cgroups/cpu"] = &CgroupsCpushareIsolatorProcess::create;
+ creators["cgroups/mem"] = &CgroupsMemIsolatorProcess::create;
+#endif // __linux__
+
+ vector<Owned<Isolator> > isolators;
+
+ foreach (const string& type, strings::split(isolation, ",")) {
+ if (creators.contains(type)) {
+ Try<Isolator*> isolator = creators[type](flags);
+ if (isolator.isError()) {
+ return Error(
+ "Could not create isolator " + type + ": " + isolator.error());
+ } else {
+ isolators.push_back(Owned<Isolator>(isolator.get()));
+ }
+ } else {
+ return Error("Unknown or unsupported isolator: " + type);
+ }
+ }
+
+#ifdef __linux__
+ // Use cgroups on Linux if any cgroups isolators are used.
+ Try<Launcher*> launcher = strings::contains(isolation, "cgroups")
+ ? CgroupsLauncher::create(flags) : PosixLauncher::create(flags);
+#else
+ Try<Launcher*> launcher = PosixLauncher::create(flags);
+#endif // __linux__
+ if (launcher.isError()) {
+ return Error("Failed to create launcher: " + launcher.error());
+ }
+
+ return new MesosContainerizer(
+ flags, local, Owned<Launcher>(launcher.get()), isolators);
+}
+
+
+map<string, string> executorEnvironment(
+ const ExecutorInfo& executorInfo,
+ const string& directory,
+ const SlaveID& slaveId,
+ const PID<Slave>& slavePid,
+ bool checkpoint,
+ const Duration& recoveryTimeout)
+{
+ map<string, string> env;
+ // Set LIBPROCESS_PORT so that we bind to a random free port (since
+ // this might have been set via --port option). We do this before
+ // the environment variables below in case it is included.
+ env["LIBPROCESS_PORT"] = "0";
+
+ // Also add MESOS_NATIVE_LIBRARY if it's not already present (and
+ // like above, we do this before the environment variables below in
+ // case the framework wants to override).
+ if (!os::hasenv("MESOS_NATIVE_LIBRARY")) {
+ string path =
+#ifdef __APPLE__
+ LIBDIR "/libmesos-" VERSION ".dylib";
+#else
+ LIBDIR "/libmesos-" VERSION ".so";
+#endif
+ if (os::exists(path)) {
+ env["MESOS_NATIVE_LIBRARY"] = path;
+ }
+ }
+
+ env["MESOS_FRAMEWORK_ID"] = executorInfo.framework_id().value();
+ env["MESOS_EXECUTOR_ID"] = executorInfo.executor_id().value();
+ env["MESOS_DIRECTORY"] = directory;
+ env["MESOS_SLAVE_ID"] = slaveId.value();
+ env["MESOS_SLAVE_PID"] = stringify(slavePid);
+ env["MESOS_CHECKPOINT"] = checkpoint ? "1" : "0";
+
+ if (checkpoint) {
+ env["MESOS_RECOVERY_TIMEOUT"] = stringify(recoveryTimeout);
+ }
+
+ return env;
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/containerizer/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp
new file mode 100644
index 0000000..d9ae326
--- /dev/null
+++ b/src/slave/containerizer/containerizer.hpp
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONTAINERIZER_HPP__
+#define __CONTAINERIZER_HPP__
+
+#include <map>
+
+#include <mesos/mesos.hpp>
+#include <mesos/resources.hpp>
+
+#include <process/future.hpp>
+#include <process/owned.hpp>
+#include <process/process.hpp>
+
+#include <stout/duration.hpp>
+#include <stout/option.hpp>
+#include <stout/try.hpp>
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Forward declaration.
+class Slave;
+class Flags;
+
+namespace state {
+// Forward declaration.
+struct SlaveState;
+} // namespace state {
+
+// An abstraction of a Containerizer that will contain an executor and its
+// tasks.
+class Containerizer
+{
+public:
+ // Information about a container termination.
+ struct Termination
+ {
+ Termination(
+ const Option<int>& _status,
+ bool _killed,
+ const std::string& _message)
+ : status(_status),
+ killed(_killed),
+ message(_message) {}
+
+ // Exit status of the executor.
+ const Option<int> status;
+
+ // A container may be killed if it exceeds its resources; this will be
+ // indicated by killed=true and described by the message string.
+ const bool killed;
+ const std::string message;
+ };
+
+ // Attempts to create a containerizer as specified by 'isolation' in flags.
+ static Try<Containerizer*> create(const Flags& flags, bool local);
+
+ // Determine slave resources from flags, probing the system or querying a
+ // delegate.
+ // TODO(idownes): Consider making this non-static and moving to containerizer
+ // implementations to enable a containerizer to best determine the resources,
+ // particularly if containerizeration is delegated.
+ static Try<Resources> resources(const Flags& flags);
+
+ virtual ~Containerizer() {}
+
+ // Recover all containerized executors specified in state. Any containerized
+ // executors present on the system but not included in state (or state is
+ // None) will be terminated and cleaned up.
+ virtual process::Future<Nothing> recover(
+ const Option<state::SlaveState>& state) = 0;
+
+ // Launch a containerized executor.
+ virtual process::Future<Nothing> launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user,
+ const SlaveID& slaveId,
+ const process::PID<Slave>& slavePid,
+ bool checkpoint) = 0;
+
+ // Update the resources for a container.
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources) = 0;
+
+ // Get resource usage statistics on the container.
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId) = 0;
+
+ // Wait on the container's 'Termination'. If the executor terminates, the
+ // containerizer should also destroy the containerized context. The future
+ // may be failed if an error occurs during termination of the executor or
+ // destruction of the container.
+ virtual process::Future<Termination> wait(const ContainerID& containerId) = 0;
+
+ // Destroy a running container, killing all processes and releasing all
+ // resources.
+ // NOTE: Containerizers will automatically destroy containers on executor
+ // termination and manual destruction is not necessary. See wait().
+ virtual void destroy(const ContainerID& containerId) = 0;
+};
+
+
+// Executor environment variables returned as (name, value) map.
+std::map<std::string, std::string> executorEnvironment(
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const SlaveID& slaveId,
+ const process::PID<Slave>& slavePid,
+ bool checkpoint,
+ const Duration& recoveryTimeout);
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __CONTAINERIZER_HPP__
[09/11] git commit: Containerizer - cgroup isolators (part 4).
Posted by vi...@apache.org.
Containerizer - cgroup isolators (part 4).
CPU and Memory isolators using Linux cgroups.
This code was written by Chi Zhang <cz...@twitter.com> and based on
the original cgroups_isolator.
Review: https://reviews.apache.org/r/16432
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d0db1ba1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d0db1ba1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d0db1ba1
Branch: refs/heads/master
Commit: d0db1ba1ce2cb40d52a1c0f0ad8095c3d7f883be
Parents: d5266b8
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:30:01 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:51 2014 -0800
----------------------------------------------------------------------
src/Makefile.am | 6 +-
.../isolators/cgroups/cpushare.cpp | 453 +++++++++++++++++
.../isolators/cgroups/cpushare.hpp | 104 ++++
.../containerizer/isolators/cgroups/mem.cpp | 498 +++++++++++++++++++
.../containerizer/isolators/cgroups/mem.hpp | 116 +++++
5 files changed, 1176 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/d0db1ba1/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index d9cb9e9..8a6a976 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -206,7 +206,9 @@ nodist_pkginclude_HEADERS = ../include/mesos/mesos.hpp mesos.pb.h
if OS_LINUX
libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
- libmesos_no_3rdparty_la_SOURCES += slave/container/cgroups_launcher.cpp
+ libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/cpushare.cpp
+ libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/mem.cpp
+ libmesos_no_3rdparty_la_SOURCES += slave/containerizer/cgroups_launcher.cpp
libmesos_no_3rdparty_la_SOURCES += linux/fs.cpp
else
EXTRA_DIST += linux/cgroups.cpp
@@ -235,6 +237,8 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp \
slave/containerizer/cgroups_launcher.hpp \
slave/containerizer/containerizer.hpp \
slave/containerizer/isolator.hpp \
+ slave/containerizer/isolators/cgroups/cpushare.hpp \
+ slave/containerizer/isolators/cgroups/mem.hpp \
slave/containerizer/isolators/posix.hpp \
slave/containerizer/launcher.hpp \
slave/containerizer/mesos_containerizer.hpp \
http://git-wip-us.apache.org/repos/asf/mesos/blob/d0db1ba1/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
new file mode 100644
index 0000000..989d384
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -0,0 +1,453 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include <mesos/values.hpp>
+#include <mesos/resources.hpp>
+
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/pid.hpp>
+
+#include <stout/bytes.hpp>
+#include <stout/check.hpp>
+#include <stout/error.hpp>
+#include <stout/foreach.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
+#include <stout/nothing.hpp>
+#include <stout/stringify.hpp>
+#include <stout/try.hpp>
+
+#include "common/type_utils.hpp"
+
+#include "linux/cgroups.hpp"
+
+#include "slave/flags.hpp"
+
+#include "slave/containerizer/isolators/cgroups/cpushare.hpp"
+
+using namespace process;
+
+using std::list;
+using std::string;
+using std::vector;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// CPU subsystem constants.
+const size_t CPU_SHARES_PER_CPU = 1024;
+const size_t MIN_CPU_SHARES = 10;
+const Duration CPU_CFS_PERIOD = Milliseconds(100); // Linux default.
+const Duration MIN_CPU_CFS_QUOTA = Milliseconds(1);
+
+
+CgroupsCpushareIsolatorProcess::CgroupsCpushareIsolatorProcess(
+ const Flags& _flags,
+ const hashmap<string, string>& _hierarchies)
+ : flags(_flags), hierarchies(_hierarchies) {}
+
+
+CgroupsCpushareIsolatorProcess::~CgroupsCpushareIsolatorProcess() {}
+
+
+Try<Isolator*> CgroupsCpushareIsolatorProcess::create(
+ const Flags& flags)
+{
+ hashmap<string, string> hierarchies;
+
+ vector<string> subsystems;
+ subsystems.push_back("cpu");
+ subsystems.push_back("cpuacct");
+
+ foreach (const string& subsystem, subsystems) {
+ Try<string> hierarchy = cgroups::prepare(
+ flags.cgroups_hierarchy, subsystem, flags.cgroups_root);
+
+ if (hierarchy.isError()) {
+ return Error("Failed to create isolator: " + hierarchy.error());
+ }
+
+ hierarchies[subsystem] = hierarchy.get();
+ }
+
+ if (flags.cgroups_enable_cfs) {
+ Try<bool> exists = cgroups::exists(
+ hierarchies["cpu"], flags.cgroups_root, "cpu.cfs_quota_us");
+
+ if (exists.isError() || !exists.get()) {
+ return Error("Failed to find 'cpu.cfs_quota_us'. Your kernel "
+ "might be too old to use the CFS cgroups feature.");
+ }
+ }
+
+ process::Owned<IsolatorProcess> process(
+ new CgroupsCpushareIsolatorProcess(flags, hierarchies));
+
+ return new Isolator(process);
+}
+
+
+Future<Nothing> CgroupsCpushareIsolatorProcess::recover(
+ const list<state::RunState>& states)
+{
+ hashset<string> cgroups;
+
+ foreach (const state::RunState& state, states) {
+ if (!state.id.isSome()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("ContainerID is required to recover");
+ }
+
+ const ContainerID& containerId = state.id.get();
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+ CHECK_NOTNULL(info);
+
+ Try<bool> exists = cgroups::exists(hierarchies["cpu"], info->cgroup);
+ if (exists.isError()) {
+ delete info;
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("Failed to check cgroup for container '" +
+ stringify(containerId) + "'");
+ }
+
+ if (!exists.get()) {
+ // This may occur if the executor has exited and the isolator has
+ // destroyed the cgroup but the slave dies before noticing this. This
+ // will be detected when the containerizer tries to monitor the
+ // executor's pid.
+ LOG(WARNING) << "Couldn't find cgroup for container " << containerId;
+ continue;
+ }
+
+ infos[containerId] = info;
+ cgroups.insert(info->cgroup);
+ }
+
+ // Remove orphans in the cpu hierarchy.
+ Try<vector<string> > orphans = cgroups::get(
+ hierarchies["cpu"], flags.cgroups_root);
+ if (orphans.isError()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure(orphans.error());
+ }
+
+ foreach (const string& orphan, orphans.get()) {
+ if (!cgroups.contains(orphan)) {
+ LOG(INFO) << "Removing orphaned cgroup"
+ << " '" << path::join("cpu", orphan) << "'";
+ cgroups::destroy(hierarchies["cpu"], orphan);
+ }
+ }
+
+ // Remove orphans in the cpuacct hierarchy.
+ orphans = cgroups::get(hierarchies["cpuacct"], flags.cgroups_root);
+ if (orphans.isError()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure(orphans.error());
+ }
+
+ foreach (const string& orphan, orphans.get()) {
+ if (!cgroups.contains(orphan)) {
+ LOG(INFO) << "Removing orphaned cgroup"
+ << " '" << path::join("cpuacct", orphan) << "'";
+ cgroups::destroy(hierarchies["cpuacct"], orphan);
+ }
+ }
+
+ return Nothing();
+}
+
+
+Future<Nothing> CgroupsCpushareIsolatorProcess::prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo)
+{
+ if (infos.contains(containerId)) {
+ return Failure("Container has already been prepared");
+ }
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+
+ infos[containerId] = CHECK_NOTNULL(info);
+
+ // Create a 'cpu' cgroup for this container.
+ Try<bool> exists = cgroups::exists(hierarchies["cpu"], info->cgroup);
+
+ if (exists.isError()) {
+ return Failure("Failed to prepare isolator: " + exists.error());
+ }
+
+ if (exists.get()) {
+ return Failure("Failed to prepare isolator: cgroup already exists");
+ }
+
+ if (!exists.get()) {
+ Try<Nothing> create = cgroups::create(hierarchies["cpu"], info->cgroup);
+ if (create.isError()) {
+ return Failure("Failed to prepare isolator: " + create.error());
+ }
+ }
+
+ // Create a 'cpuacct' cgroup for this container.
+ exists = cgroups::exists(hierarchies["cpuacct"], info->cgroup);
+
+ if (exists.isError()) {
+ return Failure("Failed to prepare isolator: " + exists.error());
+ }
+
+ if (exists.get()) {
+ return Failure("Failed to prepare isolator: cgroup already exists");
+ }
+
+ if (!exists.get()) {
+ Try<Nothing> create = cgroups::create(hierarchies["cpuacct"], info->cgroup);
+ if (create.isError()) {
+ return Failure("Failed to prepare isolator: " + create.error());
+ }
+ }
+
+ return update(containerId, executorInfo.resources());
+}
+
+
+Future<Option<CommandInfo> > CgroupsCpushareIsolatorProcess::isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ CHECK(info->pid.isNone());
+ info->pid = pid;
+
+ Try<Nothing> assign = cgroups::assign(hierarchies["cpu"], info->cgroup, pid);
+ if (assign.isError()) {
+ LOG(ERROR) << "Failed to assign container '" << info->containerId
+ << " to its own cgroup '"
+ << path::join(hierarchies["cpu"], info->cgroup)
+ << "' : " << assign.error();
+ return Failure("Failed to isolate container: " + assign.error());
+ }
+
+ assign = cgroups::assign(hierarchies["cpuacct"], info->cgroup, pid);
+ if (assign.isError()) {
+ LOG(ERROR) << "Failed to assign container '" << info->containerId
+ << " to its own cgroup '"
+ << path::join(hierarchies["cpuacct"], info->cgroup)
+ << "' : " << assign.error();
+ return Failure("Failed to isolate container: " + assign.error());
+ }
+
+ return None();
+}
+
+
+Future<Limitation> CgroupsCpushareIsolatorProcess::watch(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ CHECK_NOTNULL(infos[containerId]);
+
+ return infos[containerId]->limitation.future();
+}
+
+
+Future<Nothing> CgroupsCpushareIsolatorProcess::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ if (resources.cpus().isNone()) {
+ return Failure("No cpus resource given");
+ }
+
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ double cpus = resources.cpus().get();
+
+ // Always set cpu.shares.
+ size_t shares =
+ std::max((size_t) (CPU_SHARES_PER_CPU * cpus), MIN_CPU_SHARES);
+
+ Try<Nothing> write = cgroups::write(
+ hierarchies["cpu"], info->cgroup, "cpu.shares", stringify(shares));
+
+ if (write.isError()) {
+ return Failure("Failed to update 'cpu.shares': " + write.error());
+ }
+
+ LOG(INFO) << "Updated 'cpu.shares' to " << shares
+ << " (cpus " << cpus << ")"
+ << " for container " << containerId;
+
+ // Set cfs quota if enabled.
+ if (flags.cgroups_enable_cfs) {
+ write = cgroups::write(
+ hierarchies["cpu"],
+ info->cgroup,
+ "cpu.cfs_period_us",
+ stringify(CPU_CFS_PERIOD.us()));
+ if (write.isError()) {
+ return Failure("Failed to update 'cpu.cfs_period_us': " + write.error());
+ }
+
+ Duration desired = Microseconds(
+ static_cast<int64_t>(CPU_CFS_PERIOD.us() * cpus));
+ Duration quota = std::max(desired, MIN_CPU_CFS_QUOTA);
+
+ write = cgroups::write(
+ hierarchies["cpu"],
+ info->cgroup,
+ "cpu.cfs_quota_us",
+ stringify(quota.us()));
+ if (write.isError()) {
+ return Failure("Failed to update 'cpu.cfs_quota_us': " + write.error());
+ }
+
+ LOG(INFO) << "Updated 'cpu.cfs_period_us' to " << CPU_CFS_PERIOD
+ << " and 'cpu.cfs_quota_us' to " << quota
+ << " (cpus " << cpus << ")"
+ << " for container " << containerId;
+ }
+
+ return Nothing();
+}
+
+
+Future<ResourceStatistics> CgroupsCpushareIsolatorProcess::usage(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ ResourceStatistics result;
+
+ // Get the number of clock ticks, used for cpu accounting.
+ static long ticks = sysconf(_SC_CLK_TCK);
+
+ PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)";
+
+ // Add the cpuacct.stat information.
+ Try<hashmap<string, uint64_t> > stat =
+ cgroups::stat(hierarchies["cpuacct"], info->cgroup, "cpuacct.stat");
+
+ if (stat.isError()) {
+ return Failure("Failed to read cpuacct.stat: " + stat.error());
+ }
+
+ // TODO(bmahler): Add namespacing to cgroups to enforce the expected
+ // structure, e.g., cgroups::cpuacct::stat.
+ if (stat.get().contains("user") && stat.get().contains("system")) {
+ result.set_cpus_user_time_secs(
+ (double) stat.get()["user"] / (double) ticks);
+ result.set_cpus_system_time_secs(
+ (double) stat.get()["system"] / (double) ticks);
+ }
+
+ // Add the cpu.stat information.
+ stat = cgroups::stat(hierarchies["cpu"], info->cgroup, "cpu.stat");
+
+ if (stat.isError()) {
+ return Failure("Failed to read cpu.stat: " + stat.error());
+ }
+
+ if (stat.get().contains("nr_periods")) {
+ result.set_cpus_nr_periods(
+ (uint32_t) stat.get()["nr_periods"]);
+ }
+
+ if (stat.get().contains("nr_throttled")) {
+ result.set_cpus_nr_throttled(
+ (uint32_t) stat.get()["nr_throttled"]);
+ }
+
+ if (stat.get().contains("throttled_time")) {
+ result.set_cpus_throttled_time_secs(
+ Nanoseconds(stat.get()["throttled_time"]).secs());
+ }
+
+ return result;
+}
+
+
+Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ list<Future<bool> > futures;
+ futures.push_back(cgroups::destroy(hierarchies["cpu"], info->cgroup));
+ futures.push_back(cgroups::destroy(hierarchies["cpuacct"], info->cgroup));
+
+ return collect(futures)
+ .then(defer(PID<CgroupsCpushareIsolatorProcess>(this),
+ &CgroupsCpushareIsolatorProcess::_cleanup,
+ containerId));
+}
+
+
+Future<Nothing> CgroupsCpushareIsolatorProcess::_cleanup(
+ const ContainerID& containerId)
+{
+ CHECK(infos.contains(containerId));
+
+ delete infos[containerId];
+ infos.erase(containerId);
+
+ return Nothing();
+}
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/d0db1ba1/src/slave/containerizer/isolators/cgroups/cpushare.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.hpp b/src/slave/containerizer/isolators/cgroups/cpushare.hpp
new file mode 100644
index 0000000..49cc5bc
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.hpp
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CPUSHARE_ISOLATOR_HPP__
+#define __CPUSHARE_ISOLATOR_HPP__
+
+#include <mesos/resources.hpp>
+
+#include <process/future.hpp>
+
+#include <stout/hashmap.hpp>
+#include <stout/try.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+
+// Use the Linux cpu cgroup controller for cpu isolation which uses the
+// Completely Fair Scheduler (CFS).
+// - cpushare implements proportionally weighted scheduling.
+// - cfs implements hard quota based scheduling.
+class CgroupsCpushareIsolatorProcess : public IsolatorProcess
+{
+public:
+ static Try<Isolator*> create(const Flags& flags);
+
+ virtual ~CgroupsCpushareIsolatorProcess();
+
+ virtual process::Future<Nothing> recover(
+ const std::list<state::RunState>& states);
+
+ virtual process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo);
+
+ virtual process::Future<Option<CommandInfo> > isolate(
+ const ContainerID& containerId,
+ pid_t pid);
+
+ virtual process::Future<Limitation> watch(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> cleanup(
+ const ContainerID& containerId);
+
+private:
+ CgroupsCpushareIsolatorProcess(
+ const Flags& flags,
+ const hashmap<std::string, std::string>& hierarchies);
+
+ virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+
+ struct Info
+ {
+ Info(const ContainerID& _containerId, const std::string& _cgroup)
+ : containerId(_containerId), cgroup(_cgroup) {}
+
+ const ContainerID containerId;
+ const std::string cgroup;
+ Option<pid_t> pid;
+
+ process::Promise<Limitation> limitation;
+ };
+
+ const Flags flags;
+
+ // Map from subsystem to hierarchy.
+ hashmap<std::string, std::string> hierarchies;
+
+ hashmap<ContainerID, Info*> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __CPUSHARE_ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/d0db1ba1/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp
new file mode 100644
index 0000000..a01e114
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -0,0 +1,498 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include <mesos/resources.hpp>
+#include <mesos/values.hpp>
+
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/pid.hpp>
+
+#include <stout/bytes.hpp>
+#include <stout/check.hpp>
+#include <stout/error.hpp>
+#include <stout/foreach.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
+#include <stout/nothing.hpp>
+#include <stout/stringify.hpp>
+#include <stout/try.hpp>
+
+#include "common/type_utils.hpp"
+
+#include "linux/cgroups.hpp"
+
+#include "slave/containerizer/isolators/cgroups/mem.hpp"
+
+using namespace process;
+
+using std::list;
+using std::ostringstream;
+using std::string;
+using std::vector;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Memory subsystem constants.
+const Bytes MIN_MEMORY = Megabytes(32);
+
+
+CgroupsMemIsolatorProcess::CgroupsMemIsolatorProcess(
+ const Flags& _flags,
+ const string& _hierarchy)
+ : flags(_flags), hierarchy(_hierarchy) {}
+
+
+CgroupsMemIsolatorProcess::~CgroupsMemIsolatorProcess() {}
+
+
+Try<Isolator*> CgroupsMemIsolatorProcess::create(const Flags& flags)
+{
+ Try<string> hierarchy = cgroups::prepare(
+ flags.cgroups_hierarchy, "memory", flags.cgroups_root);
+
+ if (hierarchy.isError()) {
+ return Error("Failed to create memory cgroup: " + hierarchy.error());
+ }
+
+ // Make sure the kernel supports OOM controls.
+ Try<bool> exists = cgroups::exists(
+ hierarchy.get(), flags.cgroups_root, "memory.oom_control");
+ if (exists.isError() || !exists.get()) {
+ return Error("Failed to determine if 'memory.oom_control' control exists");
+ }
+
+ // Make sure the kernel OOM-killer is enabled.
+ // The Mesos OOM handler, as implemented, is not capable of handling
+ // the oom condition by itself safely given the limitations Linux
+ // imposes on this code path.
+ Try<Nothing> write = cgroups::write(
+ hierarchy.get(), flags.cgroups_root, "memory.oom_control", "0");
+ if (write.isError()) {
+ return Error("Failed to update memory.oom_control");
+ }
+
+ process::Owned<IsolatorProcess> process(
+ new CgroupsMemIsolatorProcess(flags, hierarchy.get()));
+
+ return new Isolator(process);
+}
+
+
+Future<Nothing> CgroupsMemIsolatorProcess::recover(
+ const list<state::RunState>& states)
+{
+ hashset<string> cgroups;
+
+ foreach (const state::RunState& state, states) {
+ if (state.id.isNone()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("ContainerID is required to recover");
+ }
+
+ const ContainerID& containerId = state.id.get();
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+ CHECK_NOTNULL(info);
+
+ Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+ if (exists.isError()) {
+ delete info;
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("Failed to check cgroup for container '" +
+ stringify(containerId) + "'");
+ }
+
+ if (!exists.get()) {
+ VLOG(1) << "Couldn't find cgroup for container " << containerId;
+ // This may occur if the executor has exiting and the isolator has
+ // destroyed the cgroup but the slave dies before noticing this. This
+ // will be detected when the containerizer tries to monitor the
+ // executor's pid.
+ continue;
+ }
+
+ infos[containerId] = info;
+ cgroups.insert(info->cgroup);
+
+ oomListen(containerId);
+ }
+
+ Try<vector<string> > orphans = cgroups::get(
+ hierarchy, flags.cgroups_root);
+ if (orphans.isError()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure(orphans.error());
+ }
+
+ foreach (const string& orphan, orphans.get()) {
+ if (!cgroups.contains(orphan)) {
+ LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
+ cgroups::destroy(hierarchy, orphan);
+ }
+ }
+
+ return Nothing();
+}
+
+
+Future<Nothing> CgroupsMemIsolatorProcess::prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo)
+{
+ if (infos.contains(containerId)) {
+ return Failure("Container has already been prepared");
+ }
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+
+ infos[containerId] = CHECK_NOTNULL(info);
+
+ // Create a cgroup for this container.
+ Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+
+ if (exists.isError()) {
+ return Failure("Failed to prepare isolator: " + exists.error());
+ }
+
+ if (exists.get()) {
+ return Failure("Failed to prepare isolator: cgroup already exists");
+ }
+
+ if (!exists.get()) {
+ Try<Nothing> create = cgroups::create(hierarchy, info->cgroup);
+ if (create.isError()) {
+ return Failure("Failed to prepare isolator: " + create.error());
+ }
+ }
+
+ oomListen(containerId);
+
+ return update(containerId, executorInfo.resources());
+}
+
+
+Future<Option<CommandInfo> > CgroupsMemIsolatorProcess::isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ CHECK(info->pid.isNone());
+ info->pid = pid;
+
+ Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
+ if (assign.isError()) {
+ return Failure("Failed to assign container '" +
+ stringify(info->containerId) + "' to its own cgroup '" +
+ path::join(hierarchy, info->cgroup) +
+ "' : " + assign.error());
+ }
+
+ return None();
+}
+
+
+Future<Limitation> CgroupsMemIsolatorProcess::watch(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ CHECK_NOTNULL(infos[containerId]);
+
+ return infos[containerId]->limitation.future();
+}
+
+
+Future<Nothing> CgroupsMemIsolatorProcess::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ if (resources.mem().isNone()) {
+ return Failure("No memory resource given");
+ }
+
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ // New limit.
+ Bytes mem = resources.mem().get();
+ Bytes limit = std::max(mem, MIN_MEMORY);
+
+ // Always set the soft limit.
+ Try<Nothing> write =
+ cgroups::memory::soft_limit_in_bytes(hierarchy, info->cgroup, limit);
+
+ if (write.isError()) {
+ return Failure("Failed to set 'memory.soft_limit_in_bytes': "
+ + write.error());
+ }
+
+ LOG(INFO) << "Updated 'memory.soft_limit_in_bytes' to " << limit
+ << " for container " << containerId;
+
+ // Read the existing limit.
+ Try<Bytes> currentLimit =
+ cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
+
+ if (currentLimit.isError()) {
+ return Failure(
+ "Failed to read 'memory.limit_in_bytes': " + currentLimit.error());
+ }
+
+ // Determine whether to set the hard limit. If this is the first
+ // time (info->pid.isNone()), or we're raising the existing limit,
+ // then we can update the hard limit safely. Otherwise, if we need
+ // to decrease 'memory.limit_in_bytes' we may induce an OOM if too
+ // much memory is in use. As a result, we only update the soft
+ // limit when the memory reservation is being reduced. This is
+ // probably okay if the machine has available resources.
+ // TODO(benh): Introduce a MemoryWatcherProcess which monitors the
+ // discrepancy between usage and soft limit and introduces a "manual oom" if
+ // necessary.
+ if (info->pid.isNone() || limit > currentLimit.get()) {
+ write = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup, limit);
+
+ if (write.isError()) {
+ return Failure("Failed to set 'memory.limit_in_bytes': " +
+ write.error());
+ }
+
+ LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit
+ << " for container " << containerId;
+ }
+
+ return Nothing();
+}
+
+
+Future<ResourceStatistics> CgroupsMemIsolatorProcess::usage(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ ResourceStatistics result;
+
+ // The rss from memory.stat is wrong in two dimensions:
+ // 1. It does not include child cgroups.
+ // 2. It does not include any file backed pages.
+ Try<Bytes> usage = cgroups::memory::usage_in_bytes(hierarchy, info->cgroup);
+ if (usage.isError()) {
+ return Failure("Failed to parse memory.usage_in_bytes: " + usage.error());
+ }
+
+ // TODO(bmahler): Add namespacing to cgroups to enforce the expected
+ // structure, e.g, cgroups::memory::stat.
+ result.set_mem_rss_bytes(usage.get().bytes());
+
+ Try<hashmap<string, uint64_t> > stat =
+ cgroups::stat(hierarchy, info->cgroup, "memory.stat");
+
+ if (stat.isError()) {
+ return Failure("Failed to read memory.stat: " + stat.error());
+ }
+
+ if (stat.get().contains("total_cache")) {
+ result.set_mem_file_bytes(stat.get()["total_cache"]);
+ }
+
+ if (stat.get().contains("total_rss")) {
+ result.set_mem_anon_bytes(stat.get()["total_rss"]);
+ }
+
+ if (stat.get().contains("total_mapped_file")) {
+ result.set_mem_mapped_file_bytes(stat.get()["total_mapped_file"]);
+ }
+
+ return result;
+}
+
+
+Future<Nothing> CgroupsMemIsolatorProcess::cleanup(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ if (info->oomNotifier.isPending()) {
+ info->oomNotifier.discard();
+ }
+
+ return cgroups::destroy(hierarchy, info->cgroup)
+ .then(defer(PID<CgroupsMemIsolatorProcess>(this),
+ &CgroupsMemIsolatorProcess::_cleanup,
+ containerId));
+}
+
+
+Future<Nothing> CgroupsMemIsolatorProcess::_cleanup(
+ const ContainerID& containerId)
+{
+ CHECK(infos.contains(containerId));
+
+ delete infos[containerId];
+ infos.erase(containerId);
+
+ return Nothing();
+}
+
+
+void CgroupsMemIsolatorProcess::oomListen(
+ const ContainerID& containerId)
+{
+ CHECK(infos.contains(containerId));
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ info->oomNotifier =
+ cgroups::listen(hierarchy, info->cgroup, "memory.oom_control");
+
+ // If the listening fails immediately, something very wrong
+ // happened. Therefore, we report a fatal error here.
+ if (info->oomNotifier.isFailed()) {
+ LOG(FATAL) << "Failed to listen for OOM events for container "
+ << containerId << ": "
+ << info->oomNotifier.failure();
+ }
+
+ LOG(INFO) << "Started listening for OOM events for container "
+ << containerId;
+
+ info->oomNotifier.onReady(defer(
+ PID<CgroupsMemIsolatorProcess>(this),
+ &CgroupsMemIsolatorProcess::oomWaited,
+ containerId,
+ lambda::_1));
+}
+
+
+void CgroupsMemIsolatorProcess::oomWaited(
+ const ContainerID& containerId,
+ const Future<uint64_t>& future)
+{
+ LOG(INFO) << "OOM notifier is triggered for container "
+ << containerId;
+
+ if (future.isDiscarded()) {
+ LOG(INFO) << "Discarded OOM notifier for container "
+ << containerId;
+ } else if (future.isFailed()) {
+ LOG(ERROR) << "Listening on OOM events failed for container "
+ << containerId << ": " << future.failure();
+ } else {
+ // Out-of-memory event happened, call the handler.
+ oom(containerId);
+ }
+}
+
+
+void CgroupsMemIsolatorProcess::oom(const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ // It is likely that process exited is executed before this
+ // function (e.g. The kill and OOM events happen at the same
+ // time, and the process exit event arrives first.) Therefore, we
+ // should not report a fatal error here.
+ LOG(INFO) << "OOM detected for an already terminated executor";
+ return;
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ LOG(INFO) << "OOM detected for container " << containerId;
+
+ // Construct a "message" string to describe why the isolator
+ // destroyed the executor's cgroup (in order to assist in
+ // debugging).
+ ostringstream message;
+ message << "Memory limit exceeded: ";
+
+ // Output the requested memory limit.
+ Try<Bytes> limit = cgroups::memory::limit_in_bytes(hierarchy, info->cgroup);
+
+ if (limit.isError()) {
+ LOG(ERROR) << "Failed to read 'memory.limit_in_bytes': " << limit.error();
+ } else {
+ message << "Requested: " << limit.get() << " ";
+ }
+
+ // Output the maximum memory usage.
+ Try<Bytes> usage = cgroups::memory::max_usage_in_bytes(
+ hierarchy, info->cgroup);
+
+ if (usage.isError()) {
+ LOG(ERROR) << "Failed to read 'memory.max_usage_in_bytes': "
+ << usage.error();
+ } else {
+ message << "Maximum Used: " << usage.get() << "\n";
+ }
+
+ // Output 'memory.stat' of the cgroup to help with debugging.
+ // NOTE: With Kernel OOM-killer enabled these stats may not reflect
+ // memory state at time of OOM.
+ Try<string> read = cgroups::read(hierarchy, info->cgroup, "memory.stat");
+ if (read.isError()) {
+ LOG(ERROR) << "Failed to read 'memory.stat': " << read.error();
+ } else {
+ message << "\nMEMORY STATISTICS: \n" << read.get() << "\n";
+ }
+
+ LOG(INFO) << strings::trim(message.str()); // Trim the extra '\n' at the end.
+
+ Resource mem = Resources::parse(
+ "mem",
+ stringify(usage.isSome() ? usage.get().bytes() : 0),
+ "*").get();
+
+ info->limitation.set(Limitation(mem, message.str()));
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/d0db1ba1/src/slave/containerizer/isolators/cgroups/mem.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.hpp b/src/slave/containerizer/isolators/cgroups/mem.hpp
new file mode 100644
index 0000000..aabb4df
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/mem.hpp
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEM_ISOLATOR_HPP__
+#define __MEM_ISOLATOR_HPP__
+
+#include <mesos/resources.hpp>
+
+#include <process/future.hpp>
+
+#include <stout/nothing.hpp>
+#include <stout/try.hpp>
+
+#include "mesos/resources.hpp"
+
+#include "slave/containerizer/isolator.hpp"
+
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+
+class CgroupsMemIsolatorProcess : public IsolatorProcess
+{
+public:
+ static Try<Isolator*> create(const Flags& flags);
+
+ virtual ~CgroupsMemIsolatorProcess();
+
+ virtual process::Future<Nothing> recover(
+ const std::list<state::RunState>& states);
+
+ virtual process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo);
+
+ virtual process::Future<Option<CommandInfo> > isolate(
+ const ContainerID& containerId,
+ pid_t pid);
+
+ virtual process::Future<Limitation> watch(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> cleanup(
+ const ContainerID& containerId);
+
+private:
+ CgroupsMemIsolatorProcess(const Flags& flags, const std::string& hierarchy);
+
+ virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+
+ struct Info
+ {
+ Info(const ContainerID& _containerId, const std::string& _cgroup)
+ : containerId(_containerId), cgroup(_cgroup) {}
+
+ const ContainerID containerId;
+ const std::string cgroup;
+ Option<pid_t> pid;
+
+ process::Promise<Limitation> limitation;
+
+ // Used to cancel the OOM listening.
+ process::Future<uint64_t> oomNotifier;
+ };
+
+ // Start listening on OOM events. This function will create an
+ // eventfd and start polling on it.
+ void oomListen(const ContainerID& containerId);
+
+ // This function is invoked when the polling on eventfd has a
+ // result.
+ void oomWaited(
+ const ContainerID& containerId,
+ const process::Future<uint64_t>& future);
+
+ // This function is invoked when the OOM event happens.
+ void oom(const ContainerID& containerId);
+
+ const Flags flags;
+
+ // The path to the cgroups subsystem hierarchy root.
+ const std::string hierarchy;
+
+ hashmap<ContainerID, Info*> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __MEM_ISOLATOR_HPP__
[06/11] Containerizer (part 1)
Posted by vi...@apache.org.
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
new file mode 100644
index 0000000..c61739b
--- /dev/null
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -0,0 +1,907 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/io.hpp>
+#include <process/reap.hpp>
+#include <process/subprocess.hpp>
+
+#include <stout/fatal.hpp>
+#include <stout/os.hpp>
+#include <stout/unreachable.hpp>
+
+#include "slave/paths.hpp"
+#include "slave/slave.hpp"
+
+#include "slave/containerizer/mesos_containerizer.hpp"
+
+using std::list;
+using std::map;
+using std::string;
+using std::vector;
+
+using namespace process;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+using state::SlaveState;
+using state::FrameworkState;
+using state::ExecutorState;
+using state::RunState;
+
+// Local function declaration/definitions.
+Future<Nothing> _nothing() { return Nothing(); }
+
+
+MesosContainerizer::MesosContainerizer(
+ const Flags& flags,
+ bool local,
+ const Owned<Launcher>& launcher,
+ const vector<Owned<Isolator> >& isolators)
+{
+ process = new MesosContainerizerProcess(
+ flags, local, launcher, isolators);
+ spawn(process);
+}
+
+
+MesosContainerizer::~MesosContainerizer()
+{
+ terminate(process);
+ process::wait(process);
+ delete process;
+}
+
+
+Future<Nothing> MesosContainerizer::recover(const Option<state::SlaveState>& state)
+{
+ return dispatch(process, &MesosContainerizerProcess::recover, state);
+}
+
+
+Future<Nothing> MesosContainerizer::launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const string& directory,
+ const Option<string>& user,
+ const SlaveID& slaveId,
+ const PID<Slave>& slavePid,
+ bool checkpoint)
+{
+ return dispatch(process,
+ &MesosContainerizerProcess::launch,
+ containerId,
+ executorInfo,
+ directory,
+ user,
+ slaveId,
+ slavePid,
+ checkpoint);
+}
+
+
+Future<Nothing> MesosContainerizer::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ return dispatch(process,
+ &MesosContainerizerProcess::update,
+ containerId,
+ resources);
+}
+
+
+Future<ResourceStatistics> MesosContainerizer::usage(
+ const ContainerID& containerId)
+{
+ return dispatch(process, &MesosContainerizerProcess::usage, containerId);
+}
+
+
+Future<Containerizer::Termination> MesosContainerizer::wait(
+ const ContainerID& containerId)
+{
+ return dispatch(process, &MesosContainerizerProcess::wait, containerId);
+}
+
+
+void MesosContainerizer::destroy(const ContainerID& containerId)
+{
+ dispatch(process, &MesosContainerizerProcess::destroy, containerId);
+}
+
+
+Future<Nothing> MesosContainerizerProcess::recover(
+ const Option<state::SlaveState>& state)
+{
+ LOG(INFO) << "Recovering containerizer";
+
+ // Gather the executor run states that we will attempt to recover.
+ list<RunState> recoverable;
+ if (state.isSome()) {
+ foreachvalue (const FrameworkState& framework, state.get().frameworks) {
+ foreachvalue (const ExecutorState& executor, framework.executors) {
+ if (executor.info.isNone()) {
+ LOG(WARNING) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because its info could not be recovered";
+ continue;
+ }
+
+ if (executor.latest.isNone()) {
+ LOG(WARNING) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because its latest run could not be recovered";
+ continue;
+ }
+
+ // We are only interested in the latest run of the executor!
+ const ContainerID& containerId = executor.latest.get();
+ CHECK(executor.runs.contains(containerId));
+ const RunState& run = executor.runs.get(containerId).get();
+
+ // We need the pid so the reaper can monitor the executor so skip this
+ // executor if it's not present. This is not an error because the slave
+ // will try to wait on the container which will return a failed
+ // Termination and everything will get cleaned up.
+ if (!run.forkedPid.isSome()) {
+ continue;
+ }
+
+ if (run.completed) {
+ VLOG(1) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because its latest run "
+ << containerId << " is completed";
+ continue;
+ }
+
+ LOG(INFO) << "Recovering container '" << containerId
+ << "' for executor '" << executor.id
+ << "' of framework " << framework.id;
+
+ recoverable.push_back(run);
+ }
+ }
+ }
+
+ // Try to recover the launcher first.
+ Try<Nothing> recover = launcher->recover(recoverable);
+ if (recover.isError()) {
+ return Failure(recover.error());
+ }
+
+ // Then recover the isolators.
+ list<Future<Nothing> > futures;
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ futures.push_back(isolator->recover(recoverable));
+ }
+
+ // If all isolators recover then continue.
+ return collect(futures)
+ .then(defer(self(), &Self::_recover, recoverable));
+}
+
+
+Future<Nothing> MesosContainerizerProcess::_recover(
+ const list<RunState>& recovered)
+{
+ foreach (const RunState& run, recovered) {
+ CHECK_SOME(run.id);
+ const ContainerID& containerId = run.id.get();
+
+ Owned<Promise<Containerizer::Termination> > promise(
+ new Promise<Containerizer::Termination>());
+ promises.put(containerId, promise);
+
+ CHECK_SOME(run.forkedPid);
+ Future<Option<int > > status = process::reap(run.forkedPid.get());
+ statuses[containerId] = status;
+ status.onAny(defer(self(), &Self::exited, containerId));
+
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ isolator->watch(containerId)
+ .onAny(defer(self(), &Self::limited, containerId, lambda::_1));
+ }
+ }
+
+ return Nothing();
+}
+
+
+// Log the message and then exit(1) in an async-signal-safe manner.
+// TODO(idownes): Move this into stout, possibly replacing its fatal(), and
+// support multiple messages to write out.
+void asyncSafeFatal(const char* message)
+{
+ // Ignore the return value from write() to silence compiler warning.
+ (void) write(STDERR_FILENO, message, strlen(message));
+ _exit(1);
+}
+
+
+// This function is executed by the forked child and should be
+// async-signal-safe.
+// TODO(idownes): Several functions used here are not actually
+// async-signal-safe:
+// 1) os::close, os::chown and os::chdir concatenate strings on error
+// 2) os::setenv uses ::setenv that is not listed as safe
+// 3) freopen is not listed as safe
+// These can all be corrected and also we could write better error messages
+// with multiple writes in an improved asyncSafeFatal.
+int execute(
+ const CommandInfo& command,
+ const string& directory,
+ const Option<string>& user,
+ const map<string, string>& env,
+ bool redirectIO,
+ int pipeRead,
+ int pipeWrite)
+{
+ // Do a blocking read on the pipe until the parent signals us to continue.
+ os::close(pipeWrite);
+ int buf;
+ ssize_t len;
+ while ((len = read(pipeRead, &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+ if (len != sizeof(buf)) {
+ os::close(pipeRead);
+ asyncSafeFatal("Failed to synchronize with parent");
+ }
+ os::close(pipeRead);
+
+ // Chown the work directory if a user is provided.
+ if (user.isSome()) {
+ Try<Nothing> chown = os::chown(user.get(), directory);
+ if (chown.isError()) {
+ asyncSafeFatal("Failed to chown work directory");
+ }
+ }
+
+ // Change user if provided.
+ if (user.isSome() && !os::su(user.get())) {
+ asyncSafeFatal("Failed to change user");
+ }
+
+ // Enter working directory.
+ if (os::chdir(directory) < 0) {
+ asyncSafeFatal("Failed to chdir into work directory");
+ }
+
+ // First set up any additional environment variables.
+ // TODO(idownes): setenv is not async-signal-safe. Environment variables
+ // could instead be set using execle.
+ foreachpair (const string& key, const string& value, env) {
+ os::setenv(key, value);
+ }
+
+ // Then set up environment variables from CommandInfo.
+ foreach(const Environment::Variable& variable,
+ command.environment().variables()) {
+ os::setenv(variable.name(), variable.value());
+ }
+
+ // Redirect output to files in working dir if required. We append because
+ // others (e.g., mesos-fetcher) may have already logged to the files.
+ // TODO(bmahler): It would be best if instead of closing stderr /
+ // stdout and redirecting, we instead always output to stderr /
+ // stdout. Also tee'ing their output into the work directory files
+ // when redirection is desired.
+ // TODO(idownes): freopen is not async-signal-safe. Could use dup2 and open
+ // directly.
+ if (redirectIO) {
+ if (freopen("stdout", "a", stdout) == NULL) {
+ asyncSafeFatal("freopen failed");
+ }
+ if (freopen("stderr", "a", stderr) == NULL) {
+ asyncSafeFatal("freopen failed");
+ }
+ }
+
+ // Execute the command (via '/bin/sh -c command').
+ execl("/bin/sh", "sh", "-c", command.value().c_str(), (char*) NULL);
+
+ // If we get here, the execv call failed.
+ asyncSafeFatal("Failed to execute command");
+
+ // Silence end of non-void function warning.
+ return UNREACHABLE();
+}
+
+
+// Launching an executor involves the following steps:
+// 1. Prepare the container. First call prepare on each isolator and then
+// fetch the executor into the container sandbox.
+// 2. Fork the executor. The forked child is blocked from exec'ing until it has
+// been isolated.
+// 3. Isolate the executor. Call isolate with the pid for each isolator.
+// 4. Exec the executor. The forked child is signalled to continue and exec the
+// executor.
+Future<Nothing> MesosContainerizerProcess::launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const string& directory,
+ const Option<string>& user,
+ const SlaveID& slaveId,
+ const PID<Slave>& slavePid,
+ bool checkpoint)
+{
+ if (promises.contains(containerId)) {
+ LOG(ERROR) << "Cannot start already running container '"
+ << containerId << "'";
+ return Failure("Container already started");
+ }
+
+ Owned<Promise<Containerizer::Termination> > promise(
+ new Promise<Containerizer::Termination>());
+ promises.put(containerId, promise);
+
+ // Store the resources for usage().
+ resources.put(containerId, executorInfo.resources());
+
+ LOG(INFO) << "Starting container '" << containerId
+ << "' for executor '" << executorInfo.executor_id()
+ << "' of framework '" << executorInfo.framework_id() << "'";
+
+ // Prepare additional environment variables for the executor.
+ const map<string, string>& env = executorEnvironment(
+ executorInfo,
+ directory,
+ slaveId,
+ slavePid,
+ checkpoint,
+ flags.recovery_timeout);
+
+ // Use a pipe to block the child until it's been isolated.
+ // The parent will close its read end after the child is forked, and the
+ // write end afer the child is signalled to exec.
+ // TODO(idownes): Ensure the pipe's file descriptors are closed even if some
+ // stage of the executor launch fails.
+ int pipes[2];
+ // We assume this should not fail under reasonable conditions so we use CHECK.
+ CHECK(pipe(pipes) == 0);
+
+ // Prepare a function for the forked child to exec() the executor.
+ lambda::function<int()> inChild = lambda::bind(
+ &execute,
+ executorInfo.command(),
+ directory,
+ user,
+ env,
+ !local,
+ pipes[0],
+ pipes[1]);
+
+ return prepare(containerId, executorInfo, directory, user)
+ .then(defer(self(),
+ &Self::fork,
+ containerId,
+ executorInfo,
+ inChild,
+ slaveId,
+ checkpoint,
+ pipes[0]))
+ .then(defer(self(),
+ &Self::isolate,
+ containerId,
+ lambda::_1))
+ .then(defer(self(),
+ &Self::exec,
+ containerId,
+ pipes[1]))
+ .onAny(lambda::bind(&os::close, pipes[0]))
+ .onAny(lambda::bind(&os::close, pipes[1]))
+ .onFailed(defer(self(),
+ &Self::destroy,
+ containerId));
+}
+
+
+Future<Nothing> MesosContainerizerProcess::prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const string& directory,
+ const Option<string>& user)
+{
+ // Start preparing all isolators (in parallel).
+ list<Future<Nothing> > futures;
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ futures.push_back(isolator->prepare(containerId, executorInfo));
+ }
+
+ // Wait for all isolators to complete preparations then fetch the executor.
+ return collect(futures)
+ .then(defer(
+ self(),
+ &Self::fetch,
+ containerId,
+ executorInfo.command(),
+ directory,
+ user));
+}
+
+
+Future<Nothing> _fetch(
+ const ContainerID& containerId,
+ const Option<int>& status)
+{
+ if (status.isNone() || (status.get() != 0)) {
+ return Failure("Failed to fetch URIs for container '" +
+ stringify(containerId) + "': exit status " +
+ (status.isNone() ? "none" : stringify(status.get())));
+ }
+
+ return Nothing();
+}
+
+
+Future<Nothing> MesosContainerizerProcess::fetch(
+ const ContainerID& containerId,
+ const CommandInfo& commandInfo,
+ const string& directory,
+ const Option<string>& user)
+{
+ // Determine path for mesos-fetcher.
+ Result<string> realpath = os::realpath(
+ path::join(flags.launcher_dir, "mesos-fetcher"));
+
+ if (!realpath.isSome()) {
+ LOG(ERROR) << "Failed to determine the canonical path "
+ << "for the mesos-fetcher '"
+ << path::join(flags.launcher_dir, "mesos-fetcher")
+ << "': "
+ << (realpath.isError() ? realpath.error()
+ : "No such file or directory");
+ return Failure("Could not fetch URIs: failed to find mesos-fetcher");
+ }
+
+ // Prepare the environment variables to pass to mesos-fetcher.
+ string uris = "";
+ foreach (const CommandInfo::URI& uri, commandInfo.uris()) {
+ uris += uri.value() + "+" +
+ (uri.has_executable() && uri.executable() ? "1" : "0");
+ uris += " ";
+ }
+ // Remove extra space at the end.
+ uris = strings::trim(uris);
+
+ // Use /usr/bin/env to set the environment variables for the fetcher
+ // subprocess because we cannot pollute the slave's environment.
+ // TODO(idownes): Remove this once Subprocess accepts environment variables.
+ string command = "/usr/bin/env";
+ command += " MESOS_EXECUTOR_URIS=" + uris;
+ command += " MESOS_WORK_DIRECTORY=" + directory;
+ if (user.isSome()) {
+ command += " MESOS_USER=" + user.get();
+ }
+ command += " MESOS_FRAMEWORKS_HOME=" + flags.frameworks_home;
+ command += " HADOOP_HOME=" + flags.hadoop_home;
+
+ // Now the actual mesos-fetcher command.
+ command += " " + realpath.get();
+
+ LOG(INFO) << "Fetching URIs for container '" << containerId
+ << "' using command '" << command << "'";
+
+ Try<Subprocess> fetcher = subprocess(command);
+ if (fetcher.isError()) {
+ return Failure("Failed to execute mesos-fetcher: " + fetcher.error());
+ }
+
+ // Redirect output (stdout and stderr) from the fetcher to log files in the
+ // executor work directory, chown'ing them if a user is specified.
+ Try<int> out = os::open(
+ path::join(directory, "stdout"),
+ O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IRWXO);
+
+ if (out.isError()) {
+ return Failure("Failed to redirect stdout: " + out.error());
+ }
+
+ if (user.isSome()) {
+ Try<Nothing> chown = os::chown(user.get(), path::join(directory, "stdout"));
+ if (chown.isError()) {
+ os::close(out.get());
+ return Failure("Failed to redirect stdout:" + chown.error());
+ }
+ }
+
+ Try<Nothing> nonblock = os::nonblock(fetcher.get().out());
+ if (nonblock.isError()) {
+ os::close(out.get());
+ return Failure("Failed to redirect stdout:" + nonblock.error());
+ }
+
+ io::splice(fetcher.get().out(), out.get())
+ .onAny(lambda::bind(&os::close, out.get()));
+
+ // Repeat for stderr.
+ Try<int> err = os::open(
+ path::join(directory, "stderr"),
+ O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IRWXO);
+
+ if (err.isError()) {
+ os::close(out.get());
+ return Failure("Failed to redirect stderr:" + err.error());
+ }
+
+ if (user.isSome()) {
+ Try<Nothing> chown = os::chown(user.get(), path::join(directory, "stderr"));
+ if (chown.isError()) {
+ os::close(out.get());
+ os::close(err.get());
+ return Failure("Failed to redirect stderr:" + chown.error());
+ }
+ }
+
+ nonblock = os::nonblock(fetcher.get().err());
+ if (nonblock.isError()) {
+ os::close(out.get());
+ os::close(err.get());
+ return Failure("Failed to redirect stderr:" + nonblock.error());
+ }
+
+ io::splice(fetcher.get().err(), err.get())
+ .onAny(lambda::bind(&os::close, err.get()));
+
+ return fetcher.get().status()
+ .then(lambda::bind(&_fetch, containerId, lambda::_1));
+}
+
+
+Future<pid_t> MesosContainerizerProcess::fork(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ lambda::function<int()>& inChild,
+ const SlaveID& slaveId,
+ bool checkpoint,
+ int pipeRead)
+{
+ Try<pid_t> forked = launcher->fork(containerId, inChild);
+
+ if (forked.isError()) {
+ return Failure("Failed to fork executor: " + forked.error());
+ }
+ pid_t pid = forked.get();
+
+ // Checkpoint the executor's pid if requested.
+ if (checkpoint) {
+ const string& path = slave::paths::getForkedPidPath(
+ slave::paths::getMetaRootDir(flags.work_dir),
+ slaveId,
+ executorInfo.framework_id(),
+ executorInfo.executor_id(),
+ containerId);
+
+ LOG(INFO) << "Checkpointing executor's forked pid " << pid
+ << " to '" << path << "'";
+
+ Try<Nothing> checkpointed =
+ slave::state::checkpoint(path, stringify(pid));
+
+ if (checkpointed.isError()) {
+ LOG(ERROR) << "Failed to checkpoint executor's forked pid to '"
+ << path << "': " << checkpointed.error();
+
+ return Failure("Could not checkpoint executor's pid");
+ }
+ }
+
+ // Monitor the executor's pid. We keep the future because we'll refer to it
+ // again during container destroy.
+ Future<Option<int> > status = process::reap(pid);
+ statuses.put(containerId, status);
+ status.onAny(defer(self(), &Self::exited, containerId));
+
+ return pid;
+}
+
+
+Future<Nothing> MesosContainerizerProcess::isolate(
+ const ContainerID& containerId,
+ pid_t _pid)
+{
+ // Set up callbacks for isolator limitations.
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ isolator->watch(containerId)
+ .onAny(defer(self(), &Self::limited, containerId, lambda::_1));
+ }
+
+ // Isolate the executor with each isolator and get optional additional
+ // commands to be run in the containerized context.
+ list<Future<Option<CommandInfo> > > futures;
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ futures.push_back(isolator->isolate(containerId, _pid));
+ }
+
+ // Wait for all isolators to complete then run additional commands.
+ return collect(futures)
+ .then(defer(self(), &Self::_isolate, containerId, lambda::_1));
+}
+
+
+Future<Nothing> MesosContainerizerProcess::_isolate(
+ const ContainerID& containerId,
+ const list<Option<CommandInfo> >& commands)
+{
+ // TODO(idownes): Implement execution of additional isolation commands.
+ foreach (const Option<CommandInfo>& command, commands) {
+ if (command.isSome()) {
+ LOG(WARNING) << "Additional isolation commands not implemented";
+ }
+ }
+
+ return Nothing();
+}
+
+
+Future<Nothing> MesosContainerizerProcess::exec(
+ const ContainerID& containerId,
+ int pipeWrite)
+{
+ CHECK(promises.contains(containerId));
+
+ // Now that we've contained the child we can signal it to continue by
+ // writing to the pipe.
+ int buf;
+ ssize_t len;
+ while ((len = write(pipeWrite, &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+ if (len != sizeof(buf)) {
+ return Failure("Failed to synchronize child process: " +
+ string(strerror(errno)));
+ }
+
+ return Nothing();
+}
+
+
+Future<Containerizer::Termination> MesosContainerizerProcess::wait(
+ const ContainerID& containerId)
+{
+ if (!promises.contains(containerId)) {
+ return Failure("Unknown container: " + stringify(containerId));
+ }
+
+ return promises[containerId]->future();
+}
+
+
+Future<Nothing> MesosContainerizerProcess::update(
+ const ContainerID& containerId,
+ const Resources& _resources)
+{
+ if (!resources.contains(containerId)) {
+ return Failure("Unknown container: " + stringify(containerId));
+ }
+
+ // Store the resources for usage().
+ resources.put(containerId, _resources);
+
+ // Update each isolator.
+ list<Future<Nothing> > futures;
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ futures.push_back(isolator->update(containerId, _resources));
+ }
+
+ // Wait for all isolators to complete.
+ return collect(futures)
+ .then(lambda::bind(_nothing));
+}
+
+
+// Resources are used to set the limit fields in the statistics but are
+// optional because they aren't known after recovery until/unless update() is
+// called.
+Future<ResourceStatistics> _usage(
+ const ContainerID& containerId,
+ const Option<Resources>& resources,
+ const list<Future<ResourceStatistics> >& statistics)
+{
+ ResourceStatistics result;
+
+ // Set the timestamp now we have all statistics.
+ result.set_timestamp(Clock::now().secs());
+
+ foreach (const Future<ResourceStatistics>& statistic, statistics) {
+ if (statistic.isReady()) {
+ result.MergeFrom(statistic.get());
+ } else {
+ LOG(WARNING) << "Skipping resource statistic for container "
+ << containerId << " because: "
+ << (statistic.isFailed() ? statistic.failure()
+ : "discarded");
+ }
+ }
+
+ if (resources.isSome()) {
+ // Set the resource allocations.
+ Option<Bytes> mem = resources.get().mem();
+ if (mem.isSome()) {
+ result.set_mem_limit_bytes(mem.get().bytes());
+ }
+
+ Option<double> cpus = resources.get().cpus();
+ if (cpus.isSome()) {
+ result.set_cpus_limit(cpus.get());
+ }
+ }
+
+ return result;
+}
+
+
+Future<ResourceStatistics> MesosContainerizerProcess::usage(
+ const ContainerID& containerId)
+{
+ if (!promises.contains(containerId)) {
+ return Failure("Unknown container: " + stringify(containerId));
+ }
+
+ list<Future<ResourceStatistics> > futures;
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ futures.push_back(isolator->usage(containerId));
+ }
+
+ // Use await() here so we can return partial usage statistics.
+ // TODO(idownes): After recovery resources won't be known until after an
+ // update() because they aren't part of the SlaveState.
+ return await(futures)
+ .then(lambda::bind(
+ _usage, containerId, resources.get(containerId), lambda::_1));
+}
+
+
+void MesosContainerizerProcess::destroy(const ContainerID& containerId)
+{
+ if (!promises.contains(containerId)) {
+ LOG(WARNING) << "Ignoring destroy of unknown container: " << containerId;
+ return;
+ }
+
+ if (destroying.contains(containerId)) {
+ // Destroy has already been initiated.
+ return;
+ }
+ destroying.insert(containerId);
+
+ LOG(INFO) << "Destroying container '" << containerId << "'";
+
+ if (statuses.contains(containerId)) {
+ // Kill all processes then continue destruction.
+ launcher->destroy(containerId)
+ .onAny(defer(self(), &Self::_destroy, containerId, lambda::_1));
+ } else {
+ // The executor never forked so no processes to kill, go straight to
+ // __destroy() with status = None().
+ __destroy(containerId, None());
+ }
+}
+
+
+void MesosContainerizerProcess::_destroy(
+ const ContainerID& containerId,
+ const Future<Nothing>& future)
+{
+ // Something has gone wrong and the launcher wasn't able to kill all the
+ // processes in the container. We cannot clean up the isolators because they
+ // may require that all processes have exited so just return the failure to
+ // the slave.
+ // TODO(idownes): This is a pretty bad state to be in but we should consider
+ // cleaning up here.
+ if (!future.isReady()) {
+ promises[containerId]->fail(
+ "Failed to destroy container: " +
+ future.isFailed() ? future.failure() : "discarded future");
+ return;
+ }
+
+ // We've successfully killed all processes in the container so get the exit
+ // status of the executor when it's ready (it may already be) and continue
+ // the destroy.
+ statuses.get(containerId).get()
+ .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1));
+}
+
+
+void MesosContainerizerProcess::__destroy(
+ const ContainerID& containerId,
+ const Future<Option<int > >& status)
+{
+ // A container is 'killed' if any isolator limited it.
+ // Note: We may not see a limitation in time for it to be registered. This
+ // could occur if the limitation (e.g., an OOM) killed the executor and we
+ // triggered destroy() off the executor exit.
+ bool killed = false;
+ string message;
+ if (limitations.contains(containerId)) {
+ killed = true;
+ foreach (const Limitation& limitation, limitations.get(containerId)) {
+ message += limitation.message;
+ }
+ message = strings::trim(message);
+ } else {
+ message = "Executor terminated";
+ }
+
+ // We can now clean up all isolators.
+ foreach (const Owned<Isolator>& isolator, isolators) {
+ isolator->cleanup(containerId);
+ }
+
+ promises[containerId]->set(Containerizer::Termination(
+ status.isReady() ? status.get() : None(),
+ killed,
+ message));
+
+ promises.erase(containerId);
+ statuses.erase(containerId);
+ limitations.erase(containerId);
+ resources.erase(containerId);
+ destroying.erase(containerId);
+}
+
+
+void MesosContainerizerProcess::exited(const ContainerID& containerId)
+{
+ if (!promises.contains(containerId)) {
+ return;
+ }
+
+ LOG(INFO) << "Executor for container '" << containerId << "' has exited";
+
+ // The executor has exited so destroy the container.
+ destroy(containerId);
+}
+
+
+void MesosContainerizerProcess::limited(
+ const ContainerID& containerId,
+ const Future<Limitation>& future)
+{
+ if (!promises.contains(containerId)) {
+ return;
+ }
+
+ if (future.isReady()) {
+ LOG(INFO) << "Container " << containerId << " has reached its limit for"
+ << " resource " << future.get().resource
+ << " and will be terminated";
+ limitations.put(containerId, future.get());
+ } else {
+ // TODO(idownes): A discarded future will not be an error when isolators
+ // discard their promises after cleanup.
+ LOG(ERROR) << "Error in a resource limitation for container "
+ << containerId << ": " << (future.isFailed() ? future.failure()
+ : "discarded");
+ }
+
+ // The container has been affected by the limitation so destroy it.
+ destroy(containerId);
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/containerizer/mesos_containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.hpp b/src/slave/containerizer/mesos_containerizer.hpp
new file mode 100644
index 0000000..f302c34
--- /dev/null
+++ b/src/slave/containerizer/mesos_containerizer.hpp
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MESOS_CONTAINERIZER_HPP__
+#define __MESOS_CONTAINERIZER_HPP__
+
+#include <list>
+#include <vector>
+
+#include <stout/hashmap.hpp>
+#include <stout/lambda.hpp>
+#include <stout/multihashmap.hpp>
+
+#include "slave/containerizer/containerizer.hpp"
+#include "slave/containerizer/isolator.hpp"
+#include "slave/containerizer/launcher.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Forward declaration.
+class MesosContainerizerProcess;
+
+class MesosContainerizer : public Containerizer
+{
+public:
+ MesosContainerizer(
+ const Flags& flags,
+ bool local,
+ const process::Owned<Launcher>& launcher,
+ const std::vector<process::Owned<Isolator> >& isolators);
+
+ virtual ~MesosContainerizer();
+
+ virtual process::Future<Nothing> recover(
+ const Option<state::SlaveState>& state);
+
+ virtual process::Future<Nothing> launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user,
+ const SlaveID& slaveId,
+ const process::PID<Slave>& slavePid,
+ bool checkpoint);
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ virtual process::Future<Containerizer::Termination> wait(
+ const ContainerID& containerId);
+
+ virtual void destroy(const ContainerID& containerId);
+
+private:
+ MesosContainerizerProcess* process;
+};
+
+
+class MesosContainerizerProcess : public process::Process<MesosContainerizerProcess>
+{
+public:
+ MesosContainerizerProcess(
+ const Flags& _flags,
+ bool _local,
+ const process::Owned<Launcher>& _launcher,
+ const std::vector<process::Owned<Isolator> >& _isolators)
+ : flags(_flags),
+ local(_local),
+ launcher(_launcher),
+ isolators(_isolators) {}
+
+ virtual ~MesosContainerizerProcess() {}
+
+ process::Future<Nothing> recover(
+ const Option<state::SlaveState>& state);
+
+ process::Future<Nothing> launch(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user,
+ const SlaveID& slaveId,
+ const process::PID<Slave>& slavePid,
+ bool checkpoint);
+
+ process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ process::Future<Containerizer::Termination> wait(
+ const ContainerID& containerId);
+
+ void destroy(const ContainerID& containerId);
+
+private:
+ process::Future<Nothing> _recover(
+ const std::list<state::RunState>& recovered);
+
+ process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user);
+
+ process::Future<Nothing> fetch(
+ const ContainerID& containerId,
+ const CommandInfo& commandInfo,
+ const std::string& directory,
+ const Option<std::string>& user);
+
+ process::Future<pid_t> fork(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ lambda::function<int()>& inChild,
+ const SlaveID& slaveId,
+ bool checkpoint,
+ int pipeRead);
+
+ process::Future<Nothing> isolate(
+ const ContainerID& containerId,
+ pid_t _pid);
+
+ process::Future<Nothing> _isolate(
+ const ContainerID& containerId,
+ const std::list<Option<CommandInfo> >& commands);
+
+ process::Future<Nothing> exec(
+ const ContainerID& containerId,
+ int pipeWrite);
+
+ // Continues 'destroy()' once all processes have been killed by the launcher.
+ void _destroy(
+ const ContainerID& containerId,
+ const process::Future<Nothing>& future);
+
+ // Continues (and completes) '_destroy()' once we get the exit status of the
+ // executor.
+ void __destroy(
+ const ContainerID& containerId,
+ const process::Future<Option<int > >& status);
+
+ // Call back for when an isolator limits a container and impacts the
+ // processes. This will trigger container destruction.
+ void limited(
+ const ContainerID& containerId,
+ const process::Future<Limitation>& future);
+
+ // Call back for when the executor exits. This will trigger container
+ // destroy.
+ void exited(const ContainerID& containerId);
+
+ const Flags flags;
+ const bool local;
+ const process::Owned<Launcher> launcher;
+ const std::vector<process::Owned<Isolator> > isolators;
+
+ // TODO(idownes): Consider putting these per-container variables into a
+ // struct.
+ // Promises for futures returned from wait().
+ hashmap<ContainerID,
+ process::Owned<process::Promise<Containerizer::Termination> > > promises;
+
+ // We need to keep track of the future exit status for each executor because
+ // we'll only get a single notification when the executor exits.
+ hashmap<ContainerID, process::Future<Option<int> > > statuses;
+
+ // We keep track of any limitations received from each isolator so we can
+ // determine the cause of an executor termination.
+ multihashmap<ContainerID, Limitation> limitations;
+
+ // We keep track of the resources for each container so we can set the
+ // ResourceStatistics limits in usage().
+ hashmap<ContainerID, Resources> resources;
+
+ // Set of containers that are in process of being destroyed.
+ hashset<ContainerID> destroying;
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __MESOS_CONTAINERIZER_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 827b2d0..e4d98a5 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -49,6 +49,12 @@ public:
"Total consumable resources per slave, in\n"
"the form 'name(role):value;name(role):value...'.");
+ add(&Flags::isolation,
+ "isolation",
+ "Isolation mechanisms to use, e.g., 'posix/cpu,posix/mem'\n"
+ "or 'cgroups/cpu,cgroups/mem'.",
+ "posix/cpu,posix/mem");
+
add(&Flags::default_role,
"default_role",
"Any resources in the --resources flag that\n"
@@ -167,7 +173,7 @@ public:
add(&Flags::cgroups_hierarchy,
"cgroups_hierarchy",
"The path to the cgroups hierarchy root\n",
- "/cgroup");
+ "/sys/fs/cgroup");
add(&Flags::cgroups_root,
"cgroups_root",
@@ -176,8 +182,8 @@ public:
add(&Flags::cgroups_subsystems,
"cgroups_subsystems",
- "List of subsystems to enable (e.g., 'cpu,freezer')\n",
- "cpu,memory,freezer");
+ "This flag has been deprecated and is no longer used,\n"
+ "please update your flags");
add(&Flags::cgroups_enable_cfs,
"cgroups_enable_cfs",
@@ -189,6 +195,7 @@ public:
Option<std::string> hostname;
Option<std::string> resources;
+ std::string isolation;
std::string default_role;
Option<std::string> attributes;
std::string work_dir;
@@ -208,7 +215,7 @@ public:
#ifdef __linux__
std::string cgroups_hierarchy;
std::string cgroups_root;
- std::string cgroups_subsystems;
+ Option<std::string> cgroups_subsystems;
bool cgroups_enable_cfs;
#endif
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/http.cpp
----------------------------------------------------------------------
diff --git a/src/slave/http.cpp b/src/slave/http.cpp
index c4f598f..7c4cfba 100644
--- a/src/slave/http.cpp
+++ b/src/slave/http.cpp
@@ -201,7 +201,7 @@ JSON::Object model(const Executor& executor)
object.values["id"] = executor.id.value();
object.values["name"] = executor.info.name();
object.values["source"] = executor.info.source();
- object.values["uuid"] = executor.uuid.toString();
+ object.values["container"] = executor.containerId.value();
object.values["directory"] = executor.directory;
object.values["resources"] = model(executor.resources);
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/isolator.cpp b/src/slave/isolator.cpp
deleted file mode 100644
index c9643cf..0000000
--- a/src/slave/isolator.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "isolator.hpp"
-#include "process_isolator.hpp"
-#ifdef __linux__
-#include "cgroups_isolator.hpp"
-#endif
-
-
-namespace mesos {
-namespace internal {
-namespace slave {
-
-Isolator* Isolator::create(const std::string &type)
-{
- if (type == "process") {
- return new ProcessIsolator();
-#ifdef __linux__
- } else if (type == "cgroups") {
- return new CgroupsIsolator();
-#endif
- }
-
- return NULL;
-}
-
-
-void Isolator::destroy(Isolator* isolator)
-{
- if (isolator != NULL) {
- delete isolator;
- }
-}
-
-} // namespace slave {
-} // namespace internal {
-} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/isolator.hpp b/src/slave/isolator.hpp
deleted file mode 100644
index 9634535..0000000
--- a/src/slave/isolator.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ISOLATOR_HPP__
-#define __ISOLATOR_HPP__
-
-#include <unistd.h>
-
-#include <string>
-
-#include <mesos/mesos.hpp>
-#include <mesos/resources.hpp>
-
-#include <process/future.hpp>
-#include <process/process.hpp>
-
-#include <stout/hashmap.hpp>
-#include <stout/nothing.hpp>
-#include <stout/option.hpp>
-#include <stout/uuid.hpp>
-
-#include "slave/flags.hpp"
-
-namespace mesos {
-namespace internal {
-namespace slave {
-namespace state {
-
-struct SlaveState; // Forward declaration.
-
-} // namespace state {
-
-// Forward declaration.
-class Slave;
-
-
-class Isolator : public process::Process<Isolator>
-{
-public:
- static Isolator* create(const std::string& type);
- static void destroy(Isolator* isolator);
-
- virtual ~Isolator() {}
-
- // Explicitely use 'initialize' since we're overloading below.
- using process::ProcessBase::initialize;
-
- // Called during slave initialization.
- virtual void initialize(
- const Flags& flags,
- const Resources& resources,
- bool local,
- const process::PID<Slave>& slave) = 0;
-
- // Called by the slave to launch an executor for a given framework.
- // If 'checkpoint' is true, the isolator is expected to checkpoint
- // the executor pid to the 'path'.
- virtual void launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const std::string& directory,
- const Resources& resources) = 0;
-
- // Terminate a framework's executor, if it is still running.
- // The executor is expected to be gone after this method exits.
- virtual void killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId) = 0;
-
- // Update the resource limits for a given framework. This method will
- // be called only after an executor for the framework is started.
- virtual void resourcesChanged(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const Resources& resources) = 0;
-
- // Returns the resource usage for the isolator.
- virtual process::Future<ResourceStatistics> usage(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId) = 0;
-
- // Recover executors.
- virtual process::Future<Nothing> recover(
- const Option<state::SlaveState>& state) = 0;
-};
-
-
-} // namespace slave {
-} // namespace internal {
-} // namespace mesos {
-
-#endif // __ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/main.cpp
----------------------------------------------------------------------
diff --git a/src/slave/main.cpp b/src/slave/main.cpp
index 44020af..8aba4ed 100644
--- a/src/slave/main.cpp
+++ b/src/slave/main.cpp
@@ -67,12 +67,6 @@ int main(int argc, char** argv)
uint16_t port;
flags.add(&port, "port", "Port to listen on", SlaveInfo().port());
- string isolation;
- flags.add(&isolation,
- "isolation",
- "Isolation mechanism, may be one of: process, cgroups",
- "process");
-
Option<string> master;
flags.add(&master,
"master",
@@ -127,11 +121,10 @@ int main(int argc, char** argv)
LOG(INFO) << "Git SHA: " << build::GIT_SHA.get();
}
- LOG(INFO) << "Creating \"" << isolation << "\" isolator";
-
- Isolator* isolator = Isolator::create(isolation);
- if (isolator == NULL) {
- EXIT(1) << "Unrecognized isolation type: " << isolation;
+ Try<Containerizer*> containerizer = Containerizer::create(flags, false);
+ if (containerizer.isError()) {
+ EXIT(1) << "Failed to create a containerizer: "
+ << containerizer.error();
}
Try<MasterDetector*> detector = MasterDetector::create(master.get());
@@ -142,14 +135,15 @@ int main(int argc, char** argv)
LOG(INFO) << "Starting Mesos slave";
Files files;
- Slave* slave = new Slave(flags, false, detector.get(), isolator, &files);
+ Slave* slave = new Slave(flags, detector.get(), containerizer.get(), &files);
process::spawn(slave);
process::wait(slave->self());
delete slave;
delete detector.get();
- Isolator::destroy(isolator);
+
+ delete containerizer.get();
return 0;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/monitor.cpp
----------------------------------------------------------------------
diff --git a/src/slave/monitor.cpp b/src/slave/monitor.cpp
index bb3723e..1c02986 100644
--- a/src/slave/monitor.cpp
+++ b/src/slave/monitor.cpp
@@ -36,7 +36,7 @@
#include <stout/lambda.hpp>
#include <stout/protobuf.hpp>
-#include "slave/isolator.hpp"
+#include "slave/containerizer/containerizer.hpp"
#include "slave/monitor.hpp"
using namespace process;
@@ -60,123 +60,116 @@ const size_t MONITORING_TIME_SERIES_CAPACITY = 1000;
const size_t MONITORING_ARCHIVED_TIME_SERIES = 25;
-Future<Nothing> ResourceMonitorProcess::watch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+Future<Nothing> ResourceMonitorProcess::start(
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo,
const Duration& interval)
{
- if (executors.contains(frameworkId) &&
- executors[frameworkId].contains(executorId)) {
- return Failure("Already watched");
+ if (monitored.contains(containerId)) {
+ return Failure("Already monitored");
}
- executors[frameworkId][executorId] =
+ monitored[containerId] =
MonitoringInfo(executorInfo,
MONITORING_TIME_SERIES_WINDOW,
MONITORING_TIME_SERIES_CAPACITY);
// Schedule the resource collection.
- delay(interval, self(), &Self::collect, frameworkId, executorId, interval);
+ delay(interval, self(), &Self::collect, containerId, interval);
return Nothing();
}
-Future<Nothing> ResourceMonitorProcess::unwatch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
+Future<Nothing> ResourceMonitorProcess::stop(
+ const ContainerID& containerId)
{
- if (!executors.contains(frameworkId) ||
- !executors[frameworkId].contains(executorId)) {
- return Failure("Not watched");
+ if (!monitored.contains(containerId)) {
+ return Failure("Not monitored");
}
// Add the monitoring information to the archive.
- archive.push_back(executors[frameworkId][executorId]);
- executors[frameworkId].erase(executorId);
-
- if (executors[frameworkId].empty()) {
- executors.erase(frameworkId);
- }
+ archive.push_back(monitored[containerId]);
+ monitored.erase(containerId);
return Nothing();
}
void ResourceMonitorProcess::collect(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ const ContainerID& containerId,
const Duration& interval)
{
- // Has the executor been unwatched?
- if (!executors.contains(frameworkId) ||
- !executors[frameworkId].contains(executorId)) {
+ // Has monitoring stopped?
+ if (!monitored.contains(containerId)) {
return;
}
- dispatch(isolator, &Isolator::usage, frameworkId, executorId)
+ containerizer->usage(containerId)
.onAny(defer(self(),
&Self::_collect,
lambda::_1,
- frameworkId,
- executorId,
+ containerId,
interval));
}
void ResourceMonitorProcess::_collect(
const Future<ResourceStatistics>& statistics,
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ const ContainerID& containerId,
const Duration& interval)
{
- // Has the executor been unwatched?
- if (!executors.contains(frameworkId) ||
- !executors[frameworkId].contains(executorId)) {
+ // Has monitoring been stopped?
+ if (!monitored.contains(containerId)) {
return;
}
+ const ExecutorID& executorId =
+ monitored[containerId].executorInfo.executor_id();
+ const FrameworkID& frameworkId =
+ monitored[containerId].executorInfo.framework_id();
+
if (statistics.isDiscarded()) {
- // Note that the isolator may have terminated, causing pending
- // dispatches to be deleted.
- VLOG(1) << "Ignoring discarded future collecting resource usage for "
- << "executor '" << executorId
+ VLOG(1) << "Ignoring discarded future collecting resource usage for"
+ << " container '" << containerId
+ << "' for executor '" << executorId
<< "' of framework '" << frameworkId << "'";
} else if (statistics.isFailed()) {
- // TODO(bmahler): Have the Isolators discard the result when the
+ // TODO(bmahler): Have the Containerizer discard the result when the
// executor was killed or completed.
VLOG(1)
- << "Failed to collect resource usage for executor '" << executorId
+ << "Failed to collect resource usage for"
+ << " container '" << containerId
+ << "' for executor '" << executorId
<< "' of framework '" << frameworkId << "': " << statistics.failure();
} else {
Try<Time> time = Time::create(statistics.get().timestamp());
if (time.isError()) {
LOG(ERROR) << "Invalid timestamp " << statistics.get().timestamp()
- << " for executor '" << executorId
+ << " for container '" << containerId
+ << "' for executor '" << executorId
<< "' of framework '" << frameworkId << ": " << time.error();
} else {
// Add the statistics to the time series.
- executors[frameworkId][executorId].statistics.set(
+ monitored[containerId].statistics.set(
statistics.get(), time.get());
}
}
// Schedule the next collection.
- delay(interval, self(), &Self::collect, frameworkId, executorId, interval);
+ delay(interval, self(), &Self::collect, containerId, interval);
}
ResourceMonitorProcess::Usage ResourceMonitorProcess::usage(
- const FrameworkID& frameworkId,
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo)
{
Usage usage;
- usage.frameworkId = frameworkId;
+ usage.containerId = containerId;
usage.executorInfo = executorInfo;
- usage.statistics = dispatch(
- isolator, &Isolator::usage, frameworkId, executorInfo.executor_id());
+ usage.statistics = containerizer->usage(containerId);
return usage;
}
@@ -196,12 +189,12 @@ Future<http::Response> ResourceMonitorProcess::_statistics(
list<Usage> usages;
list<Future<ResourceStatistics> > futures;
- foreachkey (const FrameworkID& frameworkId, executors) {
- foreachvalue (const MonitoringInfo& info, executors[frameworkId]) {
- // TODO(bmahler): Consider a batch usage API on the Isolator.
- usages.push_back(usage(frameworkId, info.executorInfo));
- futures.push_back(usages.back().statistics);
- }
+ foreachpair (const ContainerID& containerId,
+ const MonitoringInfo& info,
+ monitored) {
+ // TODO(bmahler): Consider a batch usage API on the Containerizer.
+ usages.push_back(usage(containerId, info.executorInfo));
+ futures.push_back(usages.back().statistics);
}
return process::await(futures)
@@ -217,9 +210,10 @@ Future<http::Response> ResourceMonitorProcess::__statistics(
foreach (const Usage& usage, usages) {
if (usage.statistics.isFailed()) {
- LOG(WARNING) << "Failed to get resource usage for executor "
- << usage.executorInfo.executor_id()
- << " of framework " << usage.frameworkId
+ LOG(WARNING) << "Failed to get resource usage for "
+ << " container " << usage.containerId
+ << " for executor " << usage.executorInfo.executor_id()
+ << " of framework " << usage.executorInfo.framework_id()
<< ": " << usage.statistics.failure();
continue;
} else if (usage.statistics.isDiscarded()) {
@@ -227,7 +221,7 @@ Future<http::Response> ResourceMonitorProcess::__statistics(
}
JSON::Object entry;
- entry.values["framework_id"] = usage.frameworkId.value();
+ entry.values["framework_id"] = usage.executorInfo.framework_id().value();
entry.values["executor_id"] = usage.executorInfo.executor_id().value();
entry.values["executor_name"] = usage.executorInfo.name();
entry.values["source"] = usage.executorInfo.source();
@@ -246,7 +240,7 @@ const string ResourceMonitorProcess::STATISTICS_HELP = HELP(
USAGE(
"/statistics.json"),
DESCRIPTION(
- "Returns the current resource consumption data for executors",
+ "Returns the current resource consumption data for containers",
"running under this slave.",
"",
"Example:",
@@ -276,9 +270,9 @@ const string ResourceMonitorProcess::STATISTICS_HELP = HELP(
"```"));
-ResourceMonitor::ResourceMonitor(Isolator* isolator)
+ResourceMonitor::ResourceMonitor(Containerizer* containerizer)
{
- process = new ResourceMonitorProcess(isolator);
+ process = new ResourceMonitorProcess(containerizer);
spawn(process);
}
@@ -291,28 +285,24 @@ ResourceMonitor::~ResourceMonitor()
}
-Future<Nothing> ResourceMonitor::watch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+Future<Nothing> ResourceMonitor::start(
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo,
const Duration& interval)
{
return dispatch(
process,
- &ResourceMonitorProcess::watch,
- frameworkId,
- executorId,
+ &ResourceMonitorProcess::start,
+ containerId,
executorInfo,
interval);
}
-Future<Nothing> ResourceMonitor::unwatch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
+Future<Nothing> ResourceMonitor::stop(
+ const ContainerID& containerId)
{
- return dispatch(
- process, &ResourceMonitorProcess::unwatch, frameworkId, executorId);
+ return dispatch(process, &ResourceMonitorProcess::stop, containerId);
}
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/monitor.hpp
----------------------------------------------------------------------
diff --git a/src/slave/monitor.hpp b/src/slave/monitor.hpp
index b677410..c042bc1 100644
--- a/src/slave/monitor.hpp
+++ b/src/slave/monitor.hpp
@@ -19,6 +19,7 @@
#ifndef __SLAVE_MONITOR_HPP__
#define __SLAVE_MONITOR_HPP__
+#include <map>
#include <string>
#include <boost/circular_buffer.hpp>
@@ -43,7 +44,7 @@ namespace internal {
namespace slave {
// Forward declarations.
-class Isolator;
+class Containerizer;
class ResourceMonitorProcess;
@@ -54,7 +55,7 @@ const extern size_t MONITORING_TIME_SERIES_CAPACITY;
const extern size_t MONITORING_ARCHIVED_TIME_SERIES;
-// Provides resource monitoring for executors. Resource usage time
+// Provides resource monitoring for containers. Resource usage time
// series are stored using the Statistics module. Usage information
// is also exported via a JSON endpoint.
// TODO(bmahler): Forward usage information to the master.
@@ -68,22 +69,20 @@ const extern size_t MONITORING_ARCHIVED_TIME_SERIES;
class ResourceMonitor
{
public:
- ResourceMonitor(Isolator* isolator);
+ ResourceMonitor(Containerizer* containerizer);
~ResourceMonitor();
- // Starts monitoring resources for the given executor.
- // Returns a failure if the executor is already being watched.
- process::Future<Nothing> watch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ // Starts monitoring resources for the given container.
+ // Returns a failure if the container is already being watched.
+ process::Future<Nothing> start(
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo,
const Duration& interval);
- // Stops monitoring resources for the given executor.
- // Returns a failure if the executor is unknown to the monitor.
- process::Future<Nothing> unwatch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
+ // Stops monitoring resources for the given container.
+ // Returns a failure if the container is unknown to the monitor.
+ process::Future<Nothing> stop(
+ const ContainerID& containerId);
private:
ResourceMonitorProcess* process;
@@ -93,23 +92,21 @@ private:
class ResourceMonitorProcess : public process::Process<ResourceMonitorProcess>
{
public:
- ResourceMonitorProcess(Isolator* _isolator)
+ ResourceMonitorProcess(Containerizer* _containerizer)
: ProcessBase("monitor"),
- isolator(_isolator),
+ containerizer(_containerizer),
limiter(2, Seconds(1)), // 2 permits per second.
archive(MONITORING_ARCHIVED_TIME_SERIES) {}
virtual ~ResourceMonitorProcess() {}
- process::Future<Nothing> watch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ process::Future<Nothing> start(
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo,
const Duration& interval);
- process::Future<Nothing> unwatch(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
+ process::Future<Nothing> stop(
+ const ContainerID& containerId);
protected:
virtual void initialize()
@@ -125,26 +122,24 @@ protected:
private:
void collect(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ const ContainerID& containerId,
const Duration& interval);
void _collect(
const process::Future<ResourceStatistics>& statistics,
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
+ const ContainerID& containerId,
const Duration& interval);
// This is a convenience struct for bundling usage information.
struct Usage
{
- FrameworkID frameworkId;
+ ContainerID containerId;
ExecutorInfo executorInfo;
process::Future<ResourceStatistics> statistics;
};
// Helper for returning the usage for a particular executor.
Usage usage(
- const FrameworkID& frameworkId,
+ const ContainerID& containerId,
const ExecutorInfo& executorInfo);
// HTTP Endpoints.
@@ -159,7 +154,7 @@ private:
static const std::string STATISTICS_HELP;
- Isolator* isolator;
+ Containerizer* containerizer;
// Used to rate limit the statistics.json endpoint.
process::RateLimiter limiter;
@@ -178,7 +173,8 @@ private:
process::TimeSeries<ResourceStatistics> statistics;
};
- hashmap<FrameworkID, hashmap<ExecutorID, MonitoringInfo> > executors;
+ // The monitoring info is stored for each monitored container.
+ hashmap<ContainerID, MonitoringInfo> monitored;
// Fixed-size history of monitoring information.
boost::circular_buffer<MonitoringInfo> archive;
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/paths.hpp b/src/slave/paths.hpp
index 70ee0f3..41bb73d 100644
--- a/src/slave/paths.hpp
+++ b/src/slave/paths.hpp
@@ -192,7 +192,7 @@ inline std::string getExecutorRunPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID)
+ const ContainerID& containerId)
{
return strings::format(
EXECUTOR_RUN_PATH,
@@ -200,7 +200,7 @@ inline std::string getExecutorRunPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString()).get();
+ containerId).get();
}
@@ -209,7 +209,7 @@ inline std::string getExecutorSentinelPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID)
+ const ContainerID& containerId)
{
return strings::format(
EXECUTOR_SENTINEL_PATH,
@@ -217,7 +217,7 @@ inline std::string getExecutorSentinelPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString()).get();
+ containerId).get();
}
@@ -241,7 +241,7 @@ inline std::string getLibprocessPidPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID)
+ const ContainerID& containerId)
{
return strings::format(
LIBPROCESS_PID_PATH,
@@ -249,7 +249,7 @@ inline std::string getLibprocessPidPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString()).get();
+ containerId).get();
}
@@ -258,7 +258,7 @@ inline std::string getForkedPidPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID)
+ const ContainerID& containerId)
{
return strings::format(
FORKED_PID_PATH,
@@ -266,7 +266,7 @@ inline std::string getForkedPidPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString()).get();
+ containerId).get();
}
@@ -275,7 +275,7 @@ inline std::string getTaskPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID,
+ const ContainerID& containerId,
const TaskID& taskId)
{
return strings::format(
@@ -284,7 +284,7 @@ inline std::string getTaskPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString(),
+ containerId,
taskId).get();
}
@@ -294,7 +294,7 @@ inline std::string getTaskInfoPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID,
+ const ContainerID& containerId,
const TaskID& taskId)
{
return strings::format(
@@ -303,7 +303,7 @@ inline std::string getTaskInfoPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString(),
+ containerId,
taskId).get();
}
@@ -313,7 +313,7 @@ inline std::string getTaskUpdatesPath(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID,
+ const ContainerID& containerId,
const TaskID& taskId)
{
return strings::format(
@@ -322,7 +322,7 @@ inline std::string getTaskUpdatesPath(
slaveId,
frameworkId,
executorId,
- executorUUID.toString(),
+ containerId,
taskId).get();
}
@@ -332,10 +332,10 @@ inline std::string createExecutorDirectory(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& executorUUID)
+ const ContainerID& containerId)
{
std::string directory =
- getExecutorRunPath(rootDir, slaveId, frameworkId, executorId, executorUUID);
+ getExecutorRunPath(rootDir, slaveId, frameworkId, executorId, containerId);
Try<Nothing> mkdir = os::mkdir(directory);
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/process_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/process_isolator.cpp b/src/slave/process_isolator.cpp
deleted file mode 100644
index 09cb996..0000000
--- a/src/slave/process_isolator.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h> // For perror.
-#include <string.h>
-
-#include <list>
-#include <map>
-#include <set>
-
-#include <process/clock.hpp>
-#include <process/defer.hpp>
-#include <process/dispatch.hpp>
-#include <process/id.hpp>
-#include <process/reap.hpp>
-
-#include <stout/check.hpp>
-#include <stout/exit.hpp>
-#include <stout/foreach.hpp>
-#include <stout/lambda.hpp>
-#include <stout/nothing.hpp>
-#include <stout/option.hpp>
-#include <stout/os.hpp>
-#include <stout/uuid.hpp>
-
-#include "common/type_utils.hpp"
-
-#include "slave/flags.hpp"
-#include "slave/process_isolator.hpp"
-#include "slave/state.hpp"
-
-using namespace process;
-
-using std::map;
-using std::set;
-using std::string;
-
-using process::defer;
-using process::wait; // Necessary on some OS's to disambiguate.
-
-namespace mesos {
-namespace internal {
-namespace slave {
-
-using launcher::ExecutorLauncher;
-
-using state::SlaveState;
-using state::FrameworkState;
-using state::ExecutorState;
-using state::RunState;
-
-ProcessIsolator::ProcessIsolator()
- : ProcessBase(ID::generate("process-isolator")),
- local(false),
- initialized(false) {}
-
-
-void ProcessIsolator::initialize(
- const Flags& _flags,
- const Resources& _,
- bool _local,
- const PID<Slave>& _slave)
-{
- flags = _flags;
- local = _local;
- slave = _slave;
-
- initialized = true;
-}
-
-
-void ProcessIsolator::launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const string& directory,
- const Resources& resources)
-{
- CHECK(initialized) << "Cannot launch executors before initialization!";
-
- const ExecutorID& executorId = executorInfo.executor_id();
-
- LOG(INFO) << "Launching " << executorId
- << " (" << executorInfo.command().value() << ")"
- << " in " << directory
- << " with resources " << resources
- << "' for framework " << frameworkId;
-
- ProcessInfo* info = new ProcessInfo(frameworkId, executorId);
-
- infos[frameworkId][executorId] = info;
-
- // Use pipes to determine which child has successfully changed session.
- int pipes[2];
- if (pipe(pipes) < 0) {
- PLOG(FATAL) << "Failed to create a pipe";
- }
-
- // Set the FD_CLOEXEC flags on these pipes
- Try<Nothing> cloexec = os::cloexec(pipes[0]);
- CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[0]";
-
- cloexec = os::cloexec(pipes[1]);
- CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[1]";
-
- // Create the ExecutorLauncher instance before the fork for the
- // child process to use.
- ExecutorLauncher launcher(
- slaveId,
- frameworkId,
- executorInfo.executor_id(),
- uuid,
- executorInfo.command(),
- frameworkInfo.user(),
- directory,
- flags.work_dir,
- slave,
- flags.frameworks_home,
- flags.hadoop_home,
- !local,
- flags.switch_user,
- frameworkInfo.checkpoint(),
- flags.recovery_timeout);
-
- // We get the environment map for launching mesos-launcher before
- // the fork, because we have seen deadlock issues with ostringstream
- // in the forked process before it calls exec.
- map<string, string> env = launcher.getLauncherEnvironment();
-
- pid_t pid;
- if ((pid = fork()) == -1) {
- PLOG(FATAL) << "Failed to fork to launch new executor";
- }
-
- if (pid > 0) {
- os::close(pipes[1]);
-
- // Get the child's pid via the pipe.
- if (read(pipes[0], &pid, sizeof(pid)) == -1) {
- PLOG(FATAL) << "Failed to get child PID from pipe";
- }
-
- os::close(pipes[0]);
-
- // In parent process.
- LOG(INFO) << "Forked executor at " << pid;
-
- // Record the pid (should also be the pgid since we setsid below).
- infos[frameworkId][executorId]->pid = pid;
-
- process::reap(pid)
- .onAny(defer(PID<ProcessIsolator>(this),
- &ProcessIsolator::reaped,
- pid,
- lambda::_1));
-
- // Tell the slave this executor has started.
- dispatch(slave, &Slave::executorStarted, frameworkId, executorId, pid);
- } else {
- // In child process, we make cleanup easier by putting process
- // into it's own session. DO NOT USE GLOG!
- os::close(pipes[0]);
-
- // NOTE: We setsid() in a loop because setsid() might fail if another
- // process has the same process group id as the calling process.
- while ((pid = setsid()) == -1) {
- perror("Could not put executor in its own session");
-
- std::cout << "Forking another process and retrying ..." << std::endl;
-
- if ((pid = fork()) == -1) {
- perror("Failed to fork to launch executor");
- abort();
- }
-
- if (pid > 0) {
- // In parent process.
- exit(0);
- }
- }
-
- if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) {
- perror("Failed to write PID on pipe");
- abort();
- }
-
- os::close(pipes[1]);
-
- // Setup the environment for launcher.
- foreachpair (const string& key, const string& value, env) {
- os::setenv(key, value);
- }
-
- const char** args = (const char**) new char*[2];
-
- // Determine path for mesos-launcher.
- Result<string> realpath = os::realpath(
- path::join(flags.launcher_dir, "mesos-launcher"));
-
- if (!realpath.isSome()) {
- EXIT(1) << "Failed to determine the canonical path "
- << "for the mesos-launcher '"
- << path::join(flags.launcher_dir, "mesos-launcher")
- << "': "
- << (realpath.isError()
- ? realpath.error()
- : "No such file or directory");
- }
-
- // Grab a copy of the path so that we can reliably use 'c_str()'.
- const string& path = realpath.get();
-
- args[0] = path.c_str();
- args[1] = NULL;
-
- // Execute the mesos-launcher!
- execvp(args[0], (char* const*) args);
-
- // If we get here, the execvp call failed.
- perror("Failed to execvp the mesos-launcher");
- abort();
- }
-}
-
-// NOTE: This function can be called by the isolator itself or by the
-// slave if it doesn't hear about an executor exit after it sends a
-// shutdown message.
-void ProcessIsolator::killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- CHECK(initialized) << "Cannot kill executors before initialization!";
-
- if (!infos.contains(frameworkId) ||
- !infos[frameworkId].contains(executorId) ||
- infos[frameworkId][executorId]->killed) {
- LOG(ERROR) << "Asked to kill an unknown/killed executor! " << executorId;
- return;
- }
-
- const Option<pid_t>& pid = infos[frameworkId][executorId]->pid;
-
- if (pid.isSome()) {
- // TODO(vinod): Call killtree on the pid of the actual executor process
- // that is running the tasks (stored in the local storage by the
- // executor module).
- Try<std::list<os::ProcessTree> > trees =
- os::killtree(pid.get(), SIGKILL, true, true);
-
- if (trees.isError()) {
- LOG(WARNING) << "Failed to kill the process tree rooted at pid "
- << pid.get() << ": " << trees.error();
- } else {
- LOG(INFO) << "Killed the following process trees:\n"
- << stringify(trees.get());
- }
-
- // Also kill all processes that belong to the process group of the executor.
- // This is valuable in situations where the top level executor process
- // exited and hence killtree is unable to kill any spawned orphans.
- // NOTE: This assumes that the process group id of the executor process is
- // same as its pid (which is expected to be the case with setsid()).
- // TODO(vinod): Also (recursively) kill processes belonging to the
- // same session, but have a different process group id.
- if (killpg(pid.get(), SIGKILL) == -1 && errno != ESRCH) {
- PLOG(WARNING) << "Failed to kill process group " << pid.get();
- }
-
- infos[frameworkId][executorId]->killed = true;
- }
-}
-
-
-void ProcessIsolator::resourcesChanged(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const Resources& resources)
-{
- CHECK(initialized) << "Cannot do resourcesChanged before initialization!";
-
- if (!infos.contains(frameworkId) ||
- !infos[frameworkId].contains(executorId) ||
- infos[frameworkId][executorId]->killed) {
- LOG(INFO) << "Asked to update resources for an unknown/killed executor '"
- << executorId << "' of framework " << frameworkId;
- return;
- }
-
- ProcessInfo* info = CHECK_NOTNULL(infos[frameworkId][executorId]);
-
- info->resources = resources;
-
- // Do nothing; subclasses may override this.
-}
-
-
-Future<Nothing> ProcessIsolator::recover(
- const Option<SlaveState>& state)
-{
- LOG(INFO) << "Recovering isolator";
-
- if (state.isNone()) {
- return Nothing();
- }
-
- foreachvalue (const FrameworkState& framework, state.get().frameworks) {
- foreachvalue (const ExecutorState& executor, framework.executors) {
- LOG(INFO) << "Recovering executor '" << executor.id
- << "' of framework " << framework.id;
-
- if (executor.info.isNone()) {
- LOG(WARNING) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its info cannot be recovered";
- continue;
- }
-
- if (executor.latest.isNone()) {
- LOG(WARNING) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its latest run cannot be recovered";
- continue;
- }
-
- // We are only interested in the latest run of the executor!
- const UUID& uuid = executor.latest.get();
- CHECK(executor.runs.contains(uuid));
- const RunState& run = executor.runs.get(uuid).get();
-
- if (run.completed) {
- VLOG(1) << "Skipping recovery of executor '" << executor.id
- << "' of framework " << framework.id
- << " because its latest run " << uuid << " is completed";
- continue;
- }
-
- ProcessInfo* info =
- new ProcessInfo(framework.id, executor.id, run.forkedPid);
-
- infos[framework.id][executor.id] = info;
-
- // Add the pid to the reaper to monitor exit status.
- if (run.forkedPid.isSome()) {
- process::reap(run.forkedPid.get())
- .onAny(defer(PID<ProcessIsolator>(this),
- &ProcessIsolator::reaped,
- run.forkedPid.get(),
- lambda::_1));
- }
- }
- }
-
- return Nothing();
-}
-
-
-Future<ResourceStatistics> ProcessIsolator::usage(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- if (!infos.contains(frameworkId) ||
- !infos[frameworkId].contains(executorId) ||
- infos[frameworkId][executorId]->killed) {
- return Failure("Unknown/killed executor");
- }
-
- ProcessInfo* info = infos[frameworkId][executorId];
- CHECK_NOTNULL(info);
-
- ResourceStatistics result;
-
- result.set_timestamp(Clock::now().secs());
-
- // Set the resource allocations.
- const Option<Bytes>& mem = info->resources.mem();
- if (mem.isSome()) {
- result.set_mem_limit_bytes(mem.get().bytes());
- }
-
- const Option<double>& cpus = info->resources.cpus();
- if (cpus.isSome()) {
- result.set_cpus_limit(cpus.get());
- }
-
- CHECK_SOME(info->pid);
-
- Result<os::Process> process = os::process(info->pid.get());
-
- if (!process.isSome()) {
- return Failure(
- process.isError() ? process.error() : "Process does not exist");
- }
-
- result.set_timestamp(Clock::now().secs());
-
- if (process.get().rss.isSome()) {
- result.set_mem_rss_bytes(process.get().rss.get().bytes());
- }
-
- // We only show utime and stime when both are available, otherwise
- // we're exposing a partial view of the CPU times.
- if (process.get().utime.isSome() && process.get().stime.isSome()) {
- result.set_cpus_user_time_secs(process.get().utime.get().secs());
- result.set_cpus_system_time_secs(process.get().stime.get().secs());
- }
-
- // Now aggregate all descendant process usage statistics.
- const Try<set<pid_t> >& children = os::children(info->pid.get(), true);
-
- if (children.isError()) {
- return Failure(
- "Failed to get children of " + stringify(info->pid.get()) + ": " +
- children.error());
- }
-
- // Aggregate the usage of all child processes.
- foreach (pid_t child, children.get()) {
- process = os::process(child);
-
- // Skip processes that disappear.
- if (process.isNone()) {
- continue;
- }
-
- if (process.isError()) {
- LOG(WARNING) << "Failed to get status of descendant process " << child
- << " of parent " << info->pid.get() << ": "
- << process.error();
- continue;
- }
-
- if (process.get().rss.isSome()) {
- result.set_mem_rss_bytes(
- result.mem_rss_bytes() + process.get().rss.get().bytes());
- }
-
- // We only show utime and stime when both are available, otherwise
- // we're exposing a partial view of the CPU times.
- if (process.get().utime.isSome() && process.get().stime.isSome()) {
- result.set_cpus_user_time_secs(
- result.cpus_user_time_secs() + process.get().utime.get().secs());
- result.set_cpus_system_time_secs(
- result.cpus_system_time_secs() + process.get().stime.get().secs());
- }
- }
-
- return result;
-}
-
-
-void ProcessIsolator::reaped(pid_t pid, const Future<Option<int> >& status)
-{
- foreachkey (const FrameworkID& frameworkId, infos) {
- foreachkey (const ExecutorID& executorId, infos[frameworkId]) {
- ProcessInfo* info = infos[frameworkId][executorId];
-
- if (info->pid.isSome() && info->pid.get() == pid) {
- if (!status.isReady()) {
- LOG(ERROR) << "Failed to get the status for executor '" << executorId
- << "' of framework " << frameworkId << ": "
- << (status.isFailed() ? status.failure() : "discarded");
- return;
- }
-
- LOG(INFO) << "Telling slave of terminated executor '" << executorId
- << "' of framework " << frameworkId;
-
- dispatch(slave,
- &Slave::executorTerminated,
- frameworkId,
- executorId,
- status.get(),
- false,
- "Executor terminated");
-
- if (!info->killed) {
- // Try and cleanup after the executor.
- killExecutor(frameworkId, executorId);
- }
-
- if (infos[frameworkId].size() == 1) {
- infos.erase(frameworkId);
- } else {
- infos[frameworkId].erase(executorId);
- }
- delete info;
-
- return;
- }
- }
- }
-}
-
-
-} // namespace slave {
-} // namespace internal {
-} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/process_isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/process_isolator.hpp b/src/slave/process_isolator.hpp
deleted file mode 100644
index bc52f33..0000000
--- a/src/slave/process_isolator.hpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PROCESS_ISOLATOR_HPP__
-#define __PROCESS_ISOLATOR_HPP__
-
-#include <string>
-
-#include <sys/types.h>
-
-#include <process/future.hpp>
-
-#include <stout/hashmap.hpp>
-#include <stout/nothing.hpp>
-#include <stout/option.hpp>
-#include <stout/uuid.hpp>
-
-#include "launcher/launcher.hpp"
-
-#include "slave/flags.hpp"
-#include "slave/isolator.hpp"
-#include "slave/slave.hpp"
-
-namespace mesos {
-namespace internal {
-namespace slave {
-
-class ProcessIsolator : public Isolator
-{
-public:
- ProcessIsolator();
-
- virtual void initialize(
- const Flags& flags,
- const Resources& resources,
- bool local,
- const process::PID<Slave>& slave);
-
- virtual void launchExecutor(
- const SlaveID& slaveId,
- const FrameworkID& frameworkId,
- const FrameworkInfo& frameworkInfo,
- const ExecutorInfo& executorInfo,
- const UUID& uuid,
- const std::string& directory,
- const Resources& resources);
-
- virtual void killExecutor(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- virtual void resourcesChanged(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const Resources& resources);
-
- virtual process::Future<ResourceStatistics> usage(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
- virtual process::Future<Nothing> recover(
- const Option<state::SlaveState>& state);
-
-
-private:
- // No copying, no assigning.
- ProcessIsolator(const ProcessIsolator&);
- ProcessIsolator& operator = (const ProcessIsolator&);
-
- struct ProcessInfo
- {
- ProcessInfo(const FrameworkID& _frameworkId,
- const ExecutorID& _executorId,
- const Option<pid_t>& _pid = None(),
- bool _killed = false)
- : frameworkId(_frameworkId),
- executorId(_executorId),
- pid(_pid),
- killed(_killed) {}
-
- FrameworkID frameworkId;
- ExecutorID executorId;
- Option<pid_t> pid; // PID of the forked executor process.
- bool killed; // True if "killing" has been initiated via 'killExecutor'.
- Resources resources; // Resources allocated to the process tree.
- };
-
- // TODO(benh): Make variables const by passing them via constructor.
- Flags flags;
- bool local;
- process::PID<Slave> slave;
- bool initialized;
- hashmap<FrameworkID, hashmap<ExecutorID, ProcessInfo*> > infos;
-
- void reaped(pid_t pid, const Future<Option<int> >& status);
-};
-
-} // namespace slave {
-} // namespace internal {
-} // namespace mesos {
-
-#endif // __PROCESS_ISOLATOR_HPP__
[05/11] Containerizer (part 1)
Posted by vi...@apache.org.
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 213df86..8ad955a 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include <iomanip>
#include <list>
+#include <map>
#include <sstream>
#include <string>
#include <vector>
@@ -61,6 +62,7 @@
#include "slave/status_update_manager.hpp"
using std::list;
+using std::map;
using std::string;
using std::vector;
@@ -73,20 +75,18 @@ namespace slave {
using namespace state;
Slave::Slave(const slave::Flags& _flags,
- bool _local,
MasterDetector* _detector,
- Isolator* _isolator,
+ Containerizer* _containerizer,
Files* _files)
: ProcessBase(ID::generate("slave")),
state(RECOVERING),
http(*this),
flags(_flags),
- local(_local),
completedFrameworks(MAX_COMPLETED_FRAMEWORKS),
detector(_detector),
- isolator(_isolator),
+ containerizer(_containerizer),
files(_files),
- monitor(_isolator),
+ monitor(containerizer),
statusUpdateManager(new StatusUpdateManager()),
metaDir(paths::getMetaRootDir(flags.work_dir)),
recoveryErrors(0) {}
@@ -115,101 +115,11 @@ void Slave::initialize()
CHECK_SOME(os::mkdir(flags.work_dir))
<< "Failed to create slave work directory '" << flags.work_dir << "'";
- // Properly set up resources.
- // TODO(benh): Move this computation into Flags as the "default".
- // TODO(vinod): Move some of this computation into Resources.
- Try<Resources> parse = Resources::parse(
- flags.resources.isSome() ? flags.resources.get() : "",
- flags.default_role);
- CHECK_SOME(parse);
- resources = parse.get();
-
- if (!resources.cpus().isSome()) {
- double cpus;
-
- Try<long> cpus_ = os::cpus();
- if (!cpus_.isSome()) {
- LOG(WARNING) << "Failed to auto-detect the number of cpus to use: '"
- << cpus_.error()
- << "' ; defaulting to " << DEFAULT_CPUS;
- cpus = DEFAULT_CPUS;
- } else {
- cpus = cpus_.get();
- }
-
- Resource r = Resources::parse(
- "cpus",
- stringify(cpus),
- flags.default_role).get();
- resources += r;
- }
-
-
- if (!resources.mem().isSome()) {
- Bytes mem;
-
- Try<os::Memory> mem_ = os::memory();
- if (!mem_.isSome()) {
- LOG(WARNING) << "Failed to auto-detect the size of main memory: '"
- << mem_.error()
- << "' ; defaulting to " << DEFAULT_MEM;
- mem = DEFAULT_MEM;
- } else {
- mem = mem_.get().total;
-
- // Leave 1 GB free if we have more than 1 GB, otherwise, use all!
- // TODO(benh): Have better default scheme (e.g., % of mem not
- // greater than 1 GB?)
- if (mem > Gigabytes(1)) {
- mem = mem - Gigabytes(1);
- }
- }
-
- Resource r = Resources::parse(
- "mem",
- stringify(mem.megabytes()),
- flags.default_role).get();
- resources += r;
+ Try<Resources> _resources = Containerizer::resources(flags);
+ if (_resources.isError()) {
+ EXIT(1) << "Failed to determine slave resources: " << _resources.error();
}
-
- if (!resources.disk().isSome()) {
- Bytes disk;
-
- // NOTE: We calculate disk size of the file system on
- // which the slave work directory is mounted.
- Try<Bytes> disk_ = fs::size(flags.work_dir);
- if (!disk_.isSome()) {
- LOG(WARNING) << "Failed to auto-detect the disk space: '"
- << disk_.error()
- << "' ; defaulting to " << DEFAULT_DISK;
- disk = DEFAULT_DISK;
- } else {
- disk = disk_.get();
-
- // Leave 5 GB free if we have more than 10 GB, otherwise, use all!
- // TODO(benh): Have better default scheme (e.g., % of disk not
- // greater than 10 GB?)
- if (disk > Gigabytes(10)) {
- disk = disk - Gigabytes(5);
- }
- }
-
- Resource r = Resources::parse(
- "disk",
- stringify(disk.megabytes()),
- flags.default_role).get();
- resources += r;
- }
-
- if (!resources.ports().isSome()) {
- Resource r = Resources::parse(
- "ports",
- stringify(DEFAULT_PORTS),
- flags.default_role).get();
- resources += r;
- }
-
- LOG(INFO) << "Slave resources: " << resources;
+ LOG(INFO) << "Slave resources: " << _resources.get();
if (flags.attributes.isSome()) {
attributes = Attributes::parse(flags.attributes.get());
@@ -233,8 +143,8 @@ void Slave::initialize()
// Initialize slave info.
info.set_hostname(hostname);
info.set_port(self().port);
- info.mutable_resources()->MergeFrom(resources);
- info.mutable_attributes()->MergeFrom(attributes);
+ info.mutable_resources()->CopyFrom(_resources.get());
+ info.mutable_attributes()->CopyFrom(attributes);
info.set_checkpoint(flags.checkpoint);
LOG(INFO) << "Slave hostname: " << info.hostname();
@@ -246,23 +156,6 @@ void Slave::initialize()
// executor on an older release).
// TODO(benh): Remove this after the deprecation cycle.
info.set_webui_hostname(hostname);
-
- // Spawn and initialize the isolator.
- // TODO(benh): Seems like the isolator should really be
- // spawned before being passed to the slave.
- spawn(isolator);
-
- // TODO(vinod): Also pass SlaveID here. Currently it is tricky
- // because SlaveID is only known either after recovery (if previous
- // state exists) or after the slave registers with the master. We
- // cannot delay initialize until after (re-)registration because
- // during recovery (but before re-registration), the isolator needs
- // to be initialized before accepting any messages
- // (e.g., killExecutor) from the slave.
- dispatch(isolator, &Isolator::initialize, flags, resources, local, self());
-
- // TODO(vinod): Also pass SlaveID here. The reason that this is
- // tricky is due to similar reasons described in the above comment.
statusUpdateManager->initialize(flags, self());
// Start disk monitoring.
@@ -420,10 +313,6 @@ void Slave::finalize()
CHECK_SOME(os::rm(paths::getLatestSlavePath(metaDir)));
}
}
-
- // Stop the isolator.
- terminate(isolator);
- wait(isolator);
}
@@ -971,11 +860,8 @@ void Slave::_runTask(
// Update the resources.
// TODO(Charles Reiss): The isolator is not guaranteed to update
// the resources before the executor acts on its RunTaskMessage.
- dispatch(isolator,
- &Isolator::resourcesChanged,
- framework->id,
- executor->id,
- executor->resources);
+ // TODO(idownes): Wait until this completes.
+ containerizer->update(executor->containerId, executor->resources);
LOG(INFO) << "Sending task '" << task.task_id()
<< "' to executor '" << executorId
@@ -1089,8 +975,7 @@ void Slave::killTask(
LOG(WARNING) << "Killing the unregistered executor '" << executor->id
<< "' of framework " << framework->id
<< " because it has no tasks";
- dispatch(
- isolator, &Isolator::killExecutor, framework->id, executor->id);
+ containerizer->destroy(executor->containerId);
}
break;
}
@@ -1511,7 +1396,7 @@ void Slave::registerExecutor(
info.id(),
executor->frameworkId,
executor->id,
- executor->uuid);
+ executor->containerId);
LOG(INFO) << "Checkpointing executor pid '"
<< executor->pid << "' to '" << path << "'";
@@ -1531,11 +1416,8 @@ void Slave::registerExecutor(
// TODO(Charles Reiss): We don't actually have a guarantee
// that this will be delivered or (where necessary) acted on
// before the executor gets its RunTaskMessages.
- dispatch(isolator,
- &Isolator::resourcesChanged,
- framework->id,
- executor->id,
- executor->resources);
+ // TODO(idownes): Wait until this completes.
+ containerizer->update(executor->containerId, executor->resources);
// Tell executor it's registered and give it any queued tasks.
ExecutorRegisteredMessage message;
@@ -1651,12 +1533,9 @@ void Slave::reregisterExecutor(
statusUpdate(update, executor->pid);
}
- // Tell the isolator to update the resources.
- dispatch(isolator,
- &Isolator::resourcesChanged,
- frameworkId,
- executorId,
- executor->resources);
+ // Tell the containerizer to update the resources.
+ // TODO(idownes): Wait until this completes.
+ containerizer->update(executor->containerId, executor->resources);
hashmap<TaskID, TaskInfo> unackedTasks;
foreach (const TaskInfo& task, tasks) {
@@ -1733,8 +1612,7 @@ void Slave::reregisterExecutorTimeout()
executor->state = Executor::TERMINATING;
- dispatch(
- isolator, &Isolator::killExecutor, framework->id, executor->id);
+ containerizer->destroy(executor->containerId);
break;
default:
LOG(FATAL) << "Executor '" << executor->id
@@ -1843,16 +1721,17 @@ void Slave::statusUpdate(const StatusUpdate& update, const UPID& pid)
executor->terminateTask(status.task_id(), status.state());
// Tell the isolator to update the resources.
- dispatch(isolator,
- &Isolator::resourcesChanged,
- framework->id,
- executor->id,
- executor->resources);
+ // TODO(idownes): Wait until this completes.
+ containerizer->update(executor->containerId, executor->resources);
}
if (executor->checkpoint) {
// Ask the status update manager to checkpoint and reliably send the update.
- statusUpdateManager->update(update, info.id(), executor->id, executor->uuid)
+ statusUpdateManager->update(
+ update,
+ info.id(),
+ executor->id,
+ executor->containerId)
.onAny(defer(self(),
&Slave::_statusUpdate,
lambda::_1,
@@ -2032,12 +1911,6 @@ ExecutorInfo Slave::getExecutorInfo(
"'; exit 1");
}
- // TODO(benh): Set some resources for the executor so that a task
- // doesn't end up getting killed because the amount of resources
- // of the executor went over those allocated. Note that this might
- // mean that the number of resources on the machine will actually
- // be slightly oversubscribed, so we'll need to reevaluate with
- // respect to resources that can't be oversubscribed.
return executor;
}
@@ -2045,23 +1918,41 @@ ExecutorInfo Slave::getExecutorInfo(
}
-void _watch(
- const Future<Nothing>& watch,
+void _monitor(
+ const Future<Nothing>& monitor,
const FrameworkID& frameworkId,
- const ExecutorID& executorId);
-
+ const ExecutorID& executorId,
+ const ContainerID& containerId)
+{
+ if (!monitor.isReady()) {
+ LOG(ERROR) << "Failed to monitor container '" << containerId
+ << "' for executor '" << executorId
+ << "' of framework '" << frameworkId
+ << ":" << (monitor.isFailed() ? monitor.failure() : "discarded");
+ }
+}
-// N.B. When the slave is running in "local" mode then the pid is
-// uninteresting (and possibly could cause bugs).
void Slave::executorStarted(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- pid_t pid)
+ const ContainerID& containerId,
+ const Future<Nothing>& future)
{
+ if (!future.isReady()) {
+ // The containerizer will clean up if the launch fails we'll just log this
+ // and leave the executor registration to timeout.
+ LOG(ERROR) << "Container '" << containerId
+ << "' for executor '" << executorId
+ << "' of framework '" << frameworkId
+ << "' failed to start: "
+ << (future.isFailed() ? future.failure() : " future discarded");
+ return;
+ }
+
Framework* framework = getFramework(frameworkId);
if (framework == NULL) {
- LOG(WARNING) << "Framework " << frameworkId
- << " for executor '" << executorId
+ LOG(WARNING) << "Framework '" << frameworkId
+ << "' for executor '" << executorId
<< "' is no longer valid";
return;
}
@@ -2074,61 +1965,52 @@ void Slave::executorStarted(
LOG(WARNING) << "Killing executor '" << executorId
<< "' of framework '" << frameworkId
<< "' because the framework is terminating";
- dispatch(isolator, &Isolator::killExecutor, frameworkId, executorId);
+ containerizer->destroy(containerId);
return;
}
Executor* executor = framework->getExecutor(executorId);
if (executor == NULL) {
LOG(WARNING) << "Killing unknown executor '" << executorId
- << "' of framework " << frameworkId;
- dispatch(isolator, &Isolator::killExecutor, frameworkId, executorId);
+ << "' of framework '" << frameworkId << "'";
+ containerizer->destroy(containerId);
return;
}
switch (executor->state) {
case Executor::TERMINATING:
LOG(WARNING) << "Killing executor '" << executorId
- << "' of framework " << frameworkId
- << " because the executor is terminating";
- dispatch(isolator, &Isolator::killExecutor, frameworkId, executorId);
+ << "' of framework '" << frameworkId
+ << "' because the executor is terminating";
+ containerizer->destroy(containerId);
break;
case Executor::REGISTERING:
case Executor::RUNNING:
- LOG(INFO) << "Monitoring executor " << executorId
- << " of framework " << frameworkId
- << " forked at pid " << pid;
- monitor.watch(
- frameworkId,
- executorId,
+ LOG(INFO) << "Monitoring executor '" << executorId
+ << "' of framework '" << frameworkId
+ << "' in container '" << containerId << "'";
+ // Start monitoring the container's resources.
+ monitor.start(
+ containerId,
executor->info,
flags.resource_monitoring_interval)
- .onAny(lambda::bind(_watch, lambda::_1, frameworkId, executorId));
+ .onAny(lambda::bind(_monitor,
+ lambda::_1,
+ frameworkId,
+ executorId,
+ containerId));
break;
case Executor::TERMINATED:
default:
LOG(FATAL) << " Executor '" << executorId
- << "' of framework " << frameworkId
- << "is in unexpected state " << executor->state;
+ << "' of framework '" << frameworkId
+ << "' is in an unexpected state " << executor->state;
break;
}
}
-void _watch(
- const Future<Nothing>& watch,
- const FrameworkID& frameworkId,
- const ExecutorID& executorId)
-{
- if (!watch.isReady()) {
- LOG(ERROR) << "Failed to watch executor " << executorId
- << " of framework " << frameworkId
- << ": " << (watch.isFailed() ? watch.failure() : "discarded");
- }
-}
-
-
-void _unwatch(
+void _unmonitor(
const Future<Nothing>& watch,
const FrameworkID& frameworkId,
const ExecutorID& executorId);
@@ -2138,19 +2020,30 @@ void _unwatch(
void Slave::executorTerminated(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const Option<int>& status_,
- bool destroyed,
- const string& message)
+ const Future<Containerizer::Termination>& termination)
{
int status;
- if (status_.isNone()) {
+ // A termination failure indicates the containerizer could not destroy a
+ // container.
+ // TODO(idownes): This is a serious error so consider aborting the slave if
+ // this occurs.
+ if (!termination.isReady()) {
+ LOG(ERROR) << "Termination of executor '" << executorId
+ << "' of framework '" << frameworkId
+ << "' failed: "
+ << (termination.isFailed()
+ ? termination.failure()
+ : "discarded");
+ // Set a special status for failure.
+ status = -1;
+ } else if (termination.get().status.isNone()) {
LOG(INFO) << "Executor '" << executorId
<< "' of framework " << frameworkId
<< " has terminated with unknown status";
// Set a special status for None.
status = -1;
} else {
- status = status_.get();
+ status = termination.get().status.get();
LOG(INFO) << "Executor '" << executorId
<< "' of framework " << frameworkId
<< (WIFEXITED(status)
@@ -2187,12 +2080,12 @@ void Slave::executorTerminated(
case Executor::TERMINATING: {
executor->state = Executor::TERMINATED;
- // Stop monitoring this executor.
- monitor.unwatch(frameworkId, executorId)
- .onAny(lambda::bind(_unwatch, lambda::_1, frameworkId, executorId));
+ // Stop monitoring the executor's container.
+ monitor.stop(executor->containerId)
+ .onAny(lambda::bind(_unmonitor, lambda::_1, frameworkId, executorId));
// Transition all live tasks to TASK_LOST/TASK_FAILED.
- // If the isolator destroyed the executor (e.g., due to OOM event)
+ // If the containerizer killed the executor (e.g., due to OOM event)
// or if this is a command executor, we send TASK_FAILED status updates
// instead of TASK_LOST.
// NOTE: We don't send updates if the framework is terminating
@@ -2209,7 +2102,8 @@ void Slave::executorTerminated(
foreach (Task* task, executor->launchedTasks.values()) {
if (!protobuf::isTerminalState(task->state())) {
mesos::TaskState taskState;
- if (destroyed || executor->commandExecutor) {
+ if ((termination.isReady() && termination.get().killed) ||
+ executor->commandExecutor) {
taskState = TASK_FAILED;
} else {
taskState = TASK_LOST;
@@ -2219,7 +2113,8 @@ void Slave::executorTerminated(
info.id(),
task->task_id(),
taskState,
- message,
+ termination.isReady() ? termination.get().message :
+ "Abnormal executor termination",
executorId),
UPID());
}
@@ -2230,7 +2125,8 @@ void Slave::executorTerminated(
// supports it.
foreach (const TaskInfo& task, executor->queuedTasks.values()) {
mesos::TaskState taskState;
- if (destroyed || executor->commandExecutor) {
+ if ((termination.isReady() && termination.get().killed) ||
+ executor->commandExecutor) {
taskState = TASK_FAILED;
} else {
taskState = TASK_LOST;
@@ -2240,7 +2136,8 @@ void Slave::executorTerminated(
info.id(),
task.task_id(),
taskState,
- message,
+ termination.isReady() ? termination.get().message :
+ "Abnormal executor termination",
executorId),
UPID());
}
@@ -2305,7 +2202,7 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
// is completed.
if (executor->checkpoint) {
const string& path = paths::getExecutorSentinelPath(
- metaDir, info.id(), framework->id, executor->id, executor->uuid);
+ metaDir, info.id(), framework->id, executor->id, executor->containerId);
CHECK_SOME(os::touch(path));
}
@@ -2314,7 +2211,11 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
// Schedule the executor run work directory to get garbage collected.
const string& path = paths::getExecutorRunPath(
- flags.work_dir, info.id(), framework->id, executor->id, executor->uuid);
+ flags.work_dir,
+ info.id(),
+ framework->id,
+ executor->id,
+ executor->containerId);
os::utime(path); // Update the modification time.
garbageCollect(path)
@@ -2333,7 +2234,7 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
if (executor->checkpoint) {
// Schedule the executor run meta directory to get garbage collected.
const string& path = paths::getExecutorRunPath(
- metaDir, info.id(), framework->id, executor->id, executor->uuid);
+ metaDir, info.id(), framework->id, executor->id, executor->containerId);
os::utime(path); // Update the modification time.
garbageCollect(path);
@@ -2410,15 +2311,15 @@ void Slave::removeFramework(Framework* framework)
}
-void _unwatch(
- const Future<Nothing>& unwatch,
+void _unmonitor(
+ const Future<Nothing>& unmonitor,
const FrameworkID& frameworkId,
const ExecutorID& executorId)
{
- if (!unwatch.isReady()) {
- LOG(ERROR) << "Failed to unwatch executor " << executorId
+ if (!unmonitor.isReady()) {
+ LOG(ERROR) << "Failed to unmonitor container for executor " << executorId
<< " of framework " << frameworkId << ": "
- << (unwatch.isFailed() ? unwatch.failure() : "discarded");
+ << (unmonitor.isFailed() ? unmonitor.failure() : "discarded");
}
}
@@ -2452,14 +2353,14 @@ void Slave::shutdownExecutor(Framework* framework, Executor* executor)
&Slave::shutdownExecutorTimeout,
framework->id,
executor->id,
- executor->uuid);
+ executor->containerId);
}
void Slave::shutdownExecutorTimeout(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid)
+ const ContainerID& containerId)
{
Framework* framework = getFramework(frameworkId);
if (framework == NULL) {
@@ -2481,12 +2382,12 @@ void Slave::shutdownExecutorTimeout(
return;
}
- if (executor->uuid != uuid ) { // Make sure this timeout is valid.
+ if (executor->containerId != containerId) { // Make sure this timeout is valid.
LOG(INFO) << "A new executor '" << executorId
<< "' of framework " << frameworkId
- << " with run " << executor->uuid
+ << " with run " << executor->containerId
<< " seems to be active. Ignoring the shutdown timeout"
- << " for the old executor run " << uuid;
+ << " for the old executor run " << containerId;
return;
}
@@ -2500,7 +2401,7 @@ void Slave::shutdownExecutorTimeout(
LOG(INFO) << "Killing executor '" << executor->id
<< "' of framework " << framework->id;
- dispatch(isolator, &Isolator::killExecutor, framework->id, executor->id);
+ containerizer->destroy(executor->containerId);
break;
default:
LOG(FATAL) << "Executor '" << executor->id
@@ -2514,7 +2415,7 @@ void Slave::shutdownExecutorTimeout(
void Slave::registerExecutorTimeout(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid)
+ const ContainerID& containerId)
{
Framework* framework = getFramework(frameworkId);
if (framework == NULL) {
@@ -2543,12 +2444,12 @@ void Slave::registerExecutorTimeout(
return;
}
- if (executor->uuid != uuid ) {
+ if (executor->containerId != containerId ) {
LOG(INFO) << "A new executor '" << executorId
<< "' of framework " << frameworkId
- << " with run " << executor->uuid
+ << " with run " << executor->containerId
<< " seems to be active. Ignoring the registration timeout"
- << " for the old executor run " << uuid;
+ << " for the old executor run " << containerId;
return;
}
@@ -2567,7 +2468,7 @@ void Slave::registerExecutorTimeout(
executor->state = Executor::TERMINATING;
// Immediately kill the executor.
- dispatch(isolator, &Isolator::killExecutor, framework->id, executor->id);
+ containerizer->destroy(executor->containerId);
break;
default:
LOG(FATAL) << "Executor '" << executor->id
@@ -2670,7 +2571,14 @@ Future<Nothing> Slave::recover(const Result<SlaveState>& _state)
}
return statusUpdateManager->recover(metaDir, state)
- .then(defer(isolator, &Isolator::recover, state));
+ .then(defer(self(), &Slave::_recoverContainerizer, state));
+}
+
+
+Future<Nothing> Slave::_recoverContainerizer(
+ const Option<state::SlaveState>& state)
+{
+ return containerizer->recover(state);
}
@@ -2678,22 +2586,25 @@ Future<Nothing> Slave::_recover()
{
foreachvalue (Framework* framework, frameworks) {
foreachvalue (Executor* executor, framework->executors) {
- // If the executor is already terminating/terminated don't
- // bother reconnecting or killing it. This could happen if
- // the recovered isolator sent a 'ExecutorTerminated' message
- // before the slave is here.
- if (executor->state == Executor::TERMINATING ||
- executor->state == Executor::TERMINATED) {
- continue;
- }
-
// Monitor the executor.
- monitor.watch(
- framework->id,
- executor->id,
+ monitor.start(
+ executor->containerId,
executor->info,
flags.resource_monitoring_interval)
- .onAny(lambda::bind(_watch, lambda::_1, framework->id, executor->id));
+ .onAny(lambda::bind(_monitor,
+ lambda::_1,
+ framework->id,
+ executor->id,
+ executor->containerId));
+
+ // Set up callback for executor termination.
+ containerizer->wait(executor->containerId)
+ .onAny(defer(self(),
+ &Self::executorTerminated,
+ framework->id,
+ executor->id,
+ lambda::_1));
+
if (flags.recover == "reconnect") {
if (executor->pid) {
@@ -2722,8 +2633,7 @@ Future<Nothing> Slave::_recover()
<< "' of framework " << framework->id
<< " because no libprocess PID was found";
- dispatch(
- isolator, &Isolator::killExecutor, framework->id, executor->id);
+ containerizer->destroy(executor->containerId);
}
}
}
@@ -2924,12 +2834,12 @@ Executor* Framework::launchExecutor(
const ExecutorInfo& executorInfo,
const TaskInfo& taskInfo)
{
- // We create a UUID for the new executor. The UUID uniquely
- // identifies this new instance of the executor across executors
- // sharing the same executorID that may have previously run. It
- // also provides a means for the executor to have a unique
- // directory.
- UUID uuid = UUID::random();
+ // Generate an ID for the executor's container.
+ // TODO(idownes) This should be done by the containerizer but we need the
+ // ContainerID to create the executor's directory and to set up monitoring.
+ // Fix this when 'launchExecutor()' is handled asynchronously.
+ ContainerID containerId;
+ containerId.set_value(UUID::random().toString());
// Create a directory for the executor.
const string& directory = paths::createExecutorDirectory(
@@ -2937,10 +2847,10 @@ Executor* Framework::launchExecutor(
slave->info.id(),
id,
executorInfo.executor_id(),
- uuid);
+ containerId);
Executor* executor = new Executor(
- slave, id, executorInfo, uuid, directory, info.checkpoint());
+ slave, id, executorInfo, containerId, directory, info.checkpoint());
CHECK(!executors.contains(executorInfo.executor_id()))
<< "Unknown executor " << executorInfo.executor_id();
@@ -2953,20 +2863,37 @@ Executor* Framework::launchExecutor(
lambda::_1,
executor->directory));
- // Tell the isolator to launch the executor.
- // NOTE: We include the task's resources when launching the
- // executor so that the isolator has non-zero resources to
- // work with when the executor has no resources. This should
- // be revisited after MESOS-600.
- dispatch(slave->isolator,
- &Isolator::launchExecutor,
- slave->info.id(),
- id,
- info,
- executor->info,
- executor->uuid,
- executor->directory,
- executor->resources + taskInfo.resources());
+ // Tell the containerizer to launch the executor.
+ // NOTE: We modify the ExecutorInfo to include the task's
+ // resources when launching the executor so that the containerizer
+ // has non-zero resources to work with when the executor has
+ // no resources. This should be revisited after MESOS-600.
+ ExecutorInfo executorInfo_ = executor->info;
+ executorInfo_.mutable_resources()->MergeFrom(taskInfo.resources());
+
+ // Launch the container.
+ slave->containerizer->launch(
+ containerId,
+ executorInfo_, // modified to include the task's resources
+ executor->directory,
+ slave->flags.switch_user ? Option<string>(info.user()) : None(),
+ slave->info.id(),
+ slave->self(),
+ info.checkpoint())
+ .onAny(defer(slave,
+ &Slave::executorStarted,
+ id,
+ executor->id,
+ containerId,
+ lambda::_1));
+
+ // Set up callback for executor termination.
+ slave->containerizer->wait(containerId)
+ .onAny(defer(slave,
+ &Slave::executorTerminated,
+ id,
+ executor->id,
+ lambda::_1));
// Make sure the executor registers within the given timeout.
delay(slave->flags.executor_registration_timeout,
@@ -2974,7 +2901,7 @@ Executor* Framework::launchExecutor(
&Slave::registerExecutorTimeout,
id,
executor->id,
- executor->uuid);
+ containerId);
return executor;
}
@@ -3043,11 +2970,11 @@ void Framework::recoverExecutor(const ExecutorState& state)
// NOTE: We don't schedule the top level executor work and meta
// directories for GC here, because they will be scheduled when
// the latest executor run terminates.
- const UUID& uuid = state.latest.get();
+ const ContainerID& latest = state.latest.get();
foreachvalue (const RunState& run, state.runs) {
CHECK_SOME(run.id);
- const UUID& runId = run.id.get();
- if (uuid != runId) {
+ const ContainerID& runId = run.id.get();
+ if (latest != runId) {
// GC the executor run's work directory.
// TODO(vinod): Expose this directory to webui by recovering the
// tasks and doing a 'files->attach()'.
@@ -3060,18 +2987,18 @@ void Framework::recoverExecutor(const ExecutorState& state)
}
}
- CHECK(state.runs.contains(uuid))
- << "Cannot find latest run " << uuid << " for executor " << state.id
+ CHECK(state.runs.contains(latest))
+ << "Cannot find latest run " << latest << " for executor " << state.id
<< " of framework " << id;
- const RunState& run = state.runs.get(uuid).get();
+ const RunState& run = state.runs.get(latest).get();
// Create executor.
const string& directory = paths::getExecutorRunPath(
- slave->flags.work_dir, slave->info.id(), id, state.id, uuid);
+ slave->flags.work_dir, slave->info.id(), id, state.id, latest);
Executor* executor = new Executor(
- slave, id, state.info.get(), uuid, directory, info.checkpoint());
+ slave, id, state.info.get(), latest, directory, info.checkpoint());
// Recover the libprocess PID if possible.
if (run.libprocessPid.isSome()) {
@@ -3109,7 +3036,7 @@ void Framework::recoverExecutor(const ExecutorState& state)
executor->state = Executor::TERMINATED;
CHECK_SOME(run.id);
- const UUID& runId = run.id.get();
+ const ContainerID& runId = run.id.get();
// GC the executor run's work directory.
const string& path = paths::getExecutorRunPath(
@@ -3142,7 +3069,7 @@ Executor::Executor(
Slave* _slave,
const FrameworkID& _frameworkId,
const ExecutorInfo& _info,
- const UUID& _uuid,
+ const ContainerID& _containerId,
const string& _directory,
bool _checkpoint)
: state(REGISTERING),
@@ -3150,7 +3077,7 @@ Executor::Executor(
id(_info.executor_id()),
info(_info),
frameworkId(_frameworkId),
- uuid(_uuid),
+ containerId(_containerId),
directory(_directory),
checkpoint(_checkpoint),
commandExecutor(strings::contains(
@@ -3161,6 +3088,7 @@ Executor::Executor(
completedTasks(MAX_COMPLETED_TASKS_PER_EXECUTOR)
{
CHECK_NOTNULL(slave);
+
if (checkpoint && slave->state != slave->RECOVERING) {
// Checkpoint the executor info.
const string& path = paths::getExecutorInfoPath(
@@ -3172,7 +3100,7 @@ Executor::Executor(
// Create the meta executor directory.
// NOTE: This creates the 'latest' symlink in the meta directory.
paths::createExecutorDirectory(
- slave->metaDir, slave->info.id(), frameworkId, id, uuid);
+ slave->metaDir, slave->info.id(), frameworkId, id, containerId);
}
}
@@ -3252,7 +3180,12 @@ void Executor::checkpointTask(const TaskInfo& task)
const Task& t = protobuf::createTask(task, TASK_STAGING, id, frameworkId);
const string& path = paths::getTaskInfoPath(
- slave->metaDir, slave->info.id(), frameworkId, id, uuid, t.task_id());
+ slave->metaDir,
+ slave->info.id(),
+ frameworkId,
+ id,
+ containerId,
+ t.task_id());
LOG(INFO) << "Checkpointing TaskInfo to '" << path << "'";
CHECK_SOME(state::checkpoint(path, t));
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 2ddadb4..d82d4e9 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -46,9 +46,9 @@
#include "master/detector.hpp"
#include "slave/constants.hpp"
+#include "slave/containerizer/containerizer.hpp"
#include "slave/flags.hpp"
#include "slave/gc.hpp"
-#include "slave/isolator.hpp"
#include "slave/monitor.hpp"
#include "slave/paths.hpp"
#include "slave/state.hpp"
@@ -79,9 +79,8 @@ class Slave : public ProtobufProcess<Slave>
{
public:
Slave(const Flags& flags,
- bool local,
MasterDetector* detector,
- Isolator* isolator,
+ Containerizer* containerizer,
Files* files);
virtual ~Slave();
@@ -179,14 +178,13 @@ public:
void executorStarted(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- pid_t pid);
+ const ContainerID& containerId,
+ const Future<Nothing>& future);
void executorTerminated(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const Option<int>& status,
- bool destroyed,
- const std::string& message);
+ const Future<Containerizer::Termination>& termination);
// NOTE: Pulled these to public to make it visible for testing.
// TODO(vinod): Make tests friends to this class instead.
@@ -241,13 +239,13 @@ public:
void shutdownExecutorTimeout(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid);
+ const ContainerID& containerId);
// Shuts down the executor if it did not register yet.
void registerExecutorTimeout(
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid);
+ const ContainerID& containerId);
// Cleans up all un-reregistered executors during recovery.
void reregisterExecutorTimeout();
@@ -262,11 +260,16 @@ public:
// Recovers the slave, status update manager and isolator.
Future<Nothing> recover(const Result<state::SlaveState>& state);
- // This is called after 'recoveR()'. If 'flags.reconnect' is
+ // This is called after 'recover()'. If 'flags.reconnect' is
// 'reconnect', the slave attempts to reconnect to any old live
// executors. Otherwise, the slave attempts to shutdown/kill them.
Future<Nothing> _recover();
+ // This is a helper to call recover() on the containerizer at the end of
+ // recover() and before __recover().
+ // TODO(idownes): Remove this when we support defers to objects.
+ Future<Nothing> _recoverContainerizer(const Option<state::SlaveState>& state);
+
// This is called when recovery finishes.
void __recover(const Future<Nothing>& future);
@@ -316,8 +319,6 @@ private:
const Flags flags;
- bool local;
-
SlaveInfo info;
Option<UPID> master;
@@ -331,7 +332,8 @@ private:
MasterDetector* detector;
- Isolator* isolator;
+ Containerizer* containerizer;
+
Files* files;
// Statistics (initialized in Slave::initialize).
@@ -369,7 +371,7 @@ struct Executor
Slave* slave,
const FrameworkID& frameworkId,
const ExecutorInfo& info,
- const UUID& uuid,
+ const ContainerID& containerId,
const std::string& directory,
bool checkpoint);
@@ -402,7 +404,7 @@ struct Executor
const FrameworkID frameworkId;
- const UUID uuid; // Distinguishes executor instances with same ExecutorID.
+ const ContainerID containerId;
const std::string directory;
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 6c382cd..9af6c5b 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -336,21 +336,25 @@ Try<ExecutorState> ExecutorState::recover(
: "No such file or directory"));
}
- // Store the UUID of the latest executor run.
- state.latest = UUID::fromString(os::basename(latest.get()).get());
+ // Store the ContainerID of the latest executor run.
+ ContainerID containerId;
+ containerId.set_value(os::basename(latest.get()).get());
+ state.latest = containerId;
} else {
- const UUID& uuid = UUID::fromString(os::basename(path).get());
+ ContainerID containerId;
+ containerId.set_value(os::basename(path).get());
const Try<RunState>& run = RunState::recover(
- rootDir, slaveId, frameworkId, executorId, uuid, strict);
+ rootDir, slaveId, frameworkId, executorId, containerId, strict);
if (run.isError()) {
- return Error("Failed to recover run " + uuid.toString() +
- " of executor '" + executorId.value() +
- "': " + run.error());
+ return Error(
+ "Failed to recover run " + containerId.value() +
+ " of executor '" + executorId.value() +
+ "': " + run.error());
}
- state.runs[uuid] = run.get();
+ state.runs[containerId] = run.get();
state.errors += run.get().errors;
}
}
@@ -373,11 +377,11 @@ Try<RunState> RunState::recover(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid,
+ const ContainerID& containerId,
bool strict)
{
RunState state;
- state.id = uuid;
+ state.id = containerId;
string message;
// Find the tasks.
@@ -387,12 +391,13 @@ Try<RunState> RunState::recover(
slaveId,
frameworkId,
executorId,
- uuid.toString(),
+ containerId,
"*").get());
if (tasks.isError()) {
- return Error("Failed to find tasks for executor run " + uuid.toString() +
- ": " + tasks.error());
+ return Error(
+ "Failed to find tasks for executor run " + containerId.value() +
+ ": " + tasks.error());
}
// Recover tasks.
@@ -401,7 +406,7 @@ Try<RunState> RunState::recover(
taskId.set_value(os::basename(path).get());
const Try<TaskState>& task = TaskState::recover(
- rootDir, slaveId, frameworkId, executorId, uuid, taskId, strict);
+ rootDir, slaveId, frameworkId, executorId, containerId, taskId, strict);
if (task.isError()) {
return Error(
@@ -414,7 +419,7 @@ Try<RunState> RunState::recover(
// Read the forked pid.
string path = paths::getForkedPidPath(
- rootDir, slaveId, frameworkId, executorId, uuid);
+ rootDir, slaveId, frameworkId, executorId, containerId);
if (!os::exists(path)) {
// This could happen if the slave died before the isolator
// checkpointed the forked pid.
@@ -454,7 +459,7 @@ Try<RunState> RunState::recover(
// Read the libprocess pid.
path = paths::getLibprocessPidPath(
- rootDir, slaveId, frameworkId, executorId, uuid);
+ rootDir, slaveId, frameworkId, executorId, containerId);
if (!os::exists(path)) {
// This could happen if the slave died before the executor
@@ -490,7 +495,7 @@ Try<RunState> RunState::recover(
// See if the sentinel file exists.
path = paths::getExecutorSentinelPath(
- rootDir, slaveId, frameworkId, executorId, uuid);
+ rootDir, slaveId, frameworkId, executorId, containerId);
state.completed = os::exists(path);
@@ -503,7 +508,7 @@ Try<TaskState> TaskState::recover(
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid,
+ const ContainerID& containerId,
const TaskID& taskId,
bool strict)
{
@@ -513,7 +518,7 @@ Try<TaskState> TaskState::recover(
// Read the task info.
string path = paths::getTaskInfoPath(
- rootDir, slaveId, frameworkId, executorId, uuid, taskId);
+ rootDir, slaveId, frameworkId, executorId, containerId, taskId);
if (!os::exists(path)) {
// This could happen if the slave died after creating the task
// directory but before it checkpointed the task info.
@@ -546,7 +551,7 @@ Try<TaskState> TaskState::recover(
// Read the status updates.
path = paths::getTaskUpdatesPath(
- rootDir, slaveId, frameworkId, executorId, uuid, taskId);
+ rootDir, slaveId, frameworkId, executorId, containerId, taskId);
if (!os::exists(path)) {
// This could happen if the slave died before it checkpointed
// any status updates for this task.
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/state.hpp
----------------------------------------------------------------------
diff --git a/src/slave/state.hpp b/src/slave/state.hpp
index 78b20ff..22f569d 100644
--- a/src/slave/state.hpp
+++ b/src/slave/state.hpp
@@ -126,8 +126,8 @@ struct ExecutorState
ExecutorID id;
Option<ExecutorInfo> info;
- Option<UUID> latest;
- hashmap<UUID, RunState> runs;
+ Option<ContainerID> latest;
+ hashmap<ContainerID, RunState> runs;
unsigned int errors;
};
@@ -141,10 +141,10 @@ struct RunState
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid,
+ const ContainerID& containerId,
bool strict);
- Option<UUID> id;
+ Option<ContainerID> id;
hashmap<TaskID, TaskState> tasks;
Option<pid_t> forkedPid;
Option<process::UPID> libprocessPid;
@@ -162,7 +162,7 @@ struct TaskState
const SlaveID& slaveId,
const FrameworkID& frameworkId,
const ExecutorID& executorId,
- const UUID& uuid,
+ const ContainerID& containerId,
const TaskID& taskId,
bool strict);
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/status_update_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.cpp b/src/slave/status_update_manager.cpp
index 03f5eaf..a88bb18 100644
--- a/src/slave/status_update_manager.cpp
+++ b/src/slave/status_update_manager.cpp
@@ -71,7 +71,7 @@ public:
const StatusUpdate& update,
const SlaveID& slaveId,
const ExecutorID& executorId,
- const UUID& uuid);
+ const ContainerID& containerId);
Future<Nothing> update(
const StatusUpdate& update,
@@ -99,7 +99,7 @@ private:
const SlaveID& slaveId,
bool checkpoint,
const Option<ExecutorID>& executorId,
- const Option<UUID>& uuid);
+ const Option<ContainerID>& containerId);
// Status update timeout.
void timeout(const Duration& duration);
@@ -120,7 +120,7 @@ private:
const SlaveID& slaveId,
bool checkpoint,
const Option<ExecutorID>& executorId,
- const Option<UUID>& uuid);
+ const Option<ContainerID>& containerId);
StatusUpdateStream* getStatusUpdateStream(
const TaskID& taskId,
@@ -213,15 +213,16 @@ Future<Nothing> StatusUpdateManagerProcess::recover(
}
// We are only interested in the latest run of the executor!
- const UUID& uuid = executor.latest.get();
- CHECK(executor.runs.contains(uuid));
- const RunState& run = executor.runs.get(uuid).get();
+ const ContainerID& latest = executor.latest.get();
+ CHECK(executor.runs.contains(latest));
+ const RunState& run = executor.runs.get(latest).get();
if (run.completed) {
VLOG(1) << "Skipping recovering updates of"
<< " executor '" << executor.id
<< "' of framework " << framework.id
- << " because its latest run " << uuid << " is completed";
+ << " because its latest run " << latest.value()
+ << " is completed";
continue;
}
@@ -238,7 +239,7 @@ Future<Nothing> StatusUpdateManagerProcess::recover(
// Create a new status update stream.
StatusUpdateStream* stream = createStatusUpdateStream(
- task.id, framework.id, state.get().id, true, executor.id, uuid);
+ task.id, framework.id, state.get().id, true, executor.id, latest);
// Replay the stream.
Try<Nothing> replay = stream->replay(task.updates, task.acks);
@@ -287,9 +288,9 @@ Future<Nothing> StatusUpdateManagerProcess::update(
const StatusUpdate& update,
const SlaveID& slaveId,
const ExecutorID& executorId,
- const UUID& uuid)
+ const ContainerID& containerId)
{
- return _update(update, slaveId, true, executorId, uuid);
+ return _update(update, slaveId, true, executorId, containerId);
}
@@ -306,7 +307,7 @@ Future<Nothing> StatusUpdateManagerProcess::_update(
const SlaveID& slaveId,
bool checkpoint,
const Option<ExecutorID>& executorId,
- const Option<UUID>& uuid)
+ const Option<ContainerID>& containerId)
{
const TaskID& taskId = update.status().task_id();
const FrameworkID& frameworkId = update.framework_id();
@@ -318,7 +319,7 @@ Future<Nothing> StatusUpdateManagerProcess::_update(
StatusUpdateStream* stream = getStatusUpdateStream(taskId, frameworkId);
if (stream == NULL) {
stream = createStatusUpdateStream(
- taskId, frameworkId, slaveId, checkpoint, executorId, uuid);
+ taskId, frameworkId, slaveId, checkpoint, executorId, containerId);
}
// Verify that we didn't get a non-checkpointable update for a
@@ -488,13 +489,13 @@ StatusUpdateStream* StatusUpdateManagerProcess::createStatusUpdateStream(
const SlaveID& slaveId,
bool checkpoint,
const Option<ExecutorID>& executorId,
- const Option<UUID>& uuid)
+ const Option<ContainerID>& containerId)
{
VLOG(1) << "Creating StatusUpdate stream for task " << taskId
<< " of framework " << frameworkId;
StatusUpdateStream* stream = new StatusUpdateStream(
- taskId, frameworkId, slaveId, flags, checkpoint, executorId, uuid);
+ taskId, frameworkId, slaveId, flags, checkpoint, executorId, containerId);
streams[frameworkId][taskId] = stream;
return stream;
@@ -569,7 +570,7 @@ Future<Nothing> StatusUpdateManager::update(
const StatusUpdate& update,
const SlaveID& slaveId,
const ExecutorID& executorId,
- const UUID& uuid)
+ const ContainerID& containerId)
{
return dispatch(
process,
@@ -577,7 +578,7 @@ Future<Nothing> StatusUpdateManager::update(
update,
slaveId,
executorId,
- uuid);
+ containerId);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/f90fe764/src/slave/status_update_manager.hpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.hpp b/src/slave/status_update_manager.hpp
index 06ea465..d29e269 100644
--- a/src/slave/status_update_manager.hpp
+++ b/src/slave/status_update_manager.hpp
@@ -92,7 +92,7 @@ public:
const StatusUpdate& update,
const SlaveID& slaveId,
const ExecutorID& executorId,
- const UUID& uuid);
+ const ContainerID& containerId);
// Retries the update to the master (as long as the slave is
// alive), but does not checkpoint the update.
@@ -148,7 +148,7 @@ struct StatusUpdateStream
const Flags& _flags,
bool _checkpoint,
const Option<ExecutorID>& executorId,
- const Option<UUID>& uuid)
+ const Option<ContainerID>& containerId)
: checkpoint(_checkpoint),
terminated(false),
taskId(_taskId),
@@ -159,14 +159,14 @@ struct StatusUpdateStream
{
if (checkpoint) {
CHECK_SOME(executorId);
- CHECK_SOME(uuid);
+ CHECK_SOME(containerId);
path = paths::getTaskUpdatesPath(
paths::getMetaRootDir(flags.work_dir),
slaveId,
frameworkId,
executorId.get(),
- uuid.get(),
+ containerId.get(),
taskId);
// Create the base updates directory, if it doesn't exist.
[10/11] Containerizer - updated tests (part 5).
Posted by vi...@apache.org.
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index 1b1b4cc..8c0b006 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -3,14 +3,18 @@
#include <stout/os.hpp>
#include <stout/path.hpp>
#include <stout/result.hpp>
+#include <stout/uuid.hpp>
#ifdef __linux__
#include "linux/cgroups.hpp"
#endif
+#include "slave/containerizer/containerizer.hpp"
+#include "slave/containerizer/mesos_containerizer.hpp"
+
+#include "tests/containerizer.hpp"
#include "tests/environment.hpp"
#include "tests/flags.hpp"
-#include "tests/isolator.hpp"
#include "tests/mesos.hpp"
using namespace process;
@@ -120,16 +124,16 @@ Try<process::PID<master::Master> > MesosTest::StartMaster(
Try<process::PID<slave::Slave> > MesosTest::StartSlave(
const Option<slave::Flags>& flags)
{
- TestingIsolator* isolator = new TestingIsolator();
+ slave::Containerizer* containerizer = new TestContainerizer();
- Try<process::PID<slave::Slave> > pid = StartSlave(isolator, flags);
+ Try<process::PID<slave::Slave> > pid = StartSlave(containerizer, flags);
if (pid.isError()) {
- delete isolator;
+ delete containerizer;
return pid;
}
- isolators[pid.get()] = isolator;
+ containerizers[pid.get()] = containerizer;
return pid;
}
@@ -139,37 +143,39 @@ Try<process::PID<slave::Slave> > MesosTest::StartSlave(
MockExecutor* executor,
const Option<slave::Flags>& flags)
{
- TestingIsolator* isolator = new TestingIsolator(executor);
-
- Try<process::PID<slave::Slave> > pid = StartSlave(isolator, flags);
+ slave::Containerizer* containerizer = new TestContainerizer(executor);
+
+ Try<process::PID<slave::Slave> > pid = StartSlave(containerizer, flags);
if (pid.isError()) {
- delete isolator;
+ delete containerizer;
return pid;
}
- isolators[pid.get()] = isolator;
+ containerizers[pid.get()] = containerizer;
return pid;
}
Try<process::PID<slave::Slave> > MesosTest::StartSlave(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
const Option<slave::Flags>& flags)
{
return cluster.slaves.start(
- isolator, flags.isNone() ? CreateSlaveFlags() : flags.get());
+ containerizer, flags.isNone() ? CreateSlaveFlags() : flags.get());
}
Try<process::PID<slave::Slave> > MesosTest::StartSlave(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
Owned<MasterDetector> detector,
const Option<slave::Flags>& flags)
{
return cluster.slaves.start(
- isolator, detector, flags.isNone() ? CreateSlaveFlags() : flags.get());
+ containerizer,
+ detector,
+ flags.isNone() ? CreateSlaveFlags() : flags.get());
}
@@ -187,17 +193,19 @@ Try<PID<slave::Slave> > MesosTest::StartSlave(
Owned<MasterDetector> detector,
const Option<slave::Flags>& flags)
{
- TestingIsolator* isolator = new TestingIsolator(executor);
+ slave::Containerizer* containerizer = new TestContainerizer(executor);
Try<process::PID<slave::Slave> > pid = cluster.slaves.start(
- isolator, detector, flags.isNone() ? CreateSlaveFlags() : flags.get());
+ containerizer,
+ detector,
+ flags.isNone() ? CreateSlaveFlags() : flags.get());
if (pid.isError()) {
- delete isolator;
+ delete containerizer;
return pid;
}
- isolators[pid.get()] = isolator;
+ containerizers[pid.get()] = containerizer;
return pid;
}
@@ -212,10 +220,10 @@ void MesosTest::Stop(const process::PID<master::Master>& pid)
void MesosTest::Stop(const process::PID<slave::Slave>& pid, bool shutdown)
{
cluster.slaves.stop(pid, shutdown);
- if (isolators.count(pid) > 0) {
- TestingIsolator* isolator = isolators[pid];
- isolators.erase(pid);
- delete isolator;
+ if (containerizers.count(pid) > 0) {
+ slave::Containerizer* containerizer = containerizers[pid];
+ containerizers.erase(pid);
+ delete containerizer;
}
}
@@ -237,10 +245,10 @@ void MesosTest::ShutdownSlaves()
{
cluster.slaves.shutdown();
- foreachvalue (TestingIsolator* isolator, isolators) {
- delete isolator;
+ foreachvalue (slave::Containerizer* containerizer, containerizers) {
+ delete containerizer;
}
- isolators.clear();
+ containerizers.clear();
}
@@ -251,71 +259,114 @@ void MesosTest::TearDown()
}
-#ifdef __linux__
-void IsolatorTest<slave::CgroupsIsolator>::SetUpTestCase()
+slave::Flags ContainerizerTest<slave::MesosContainerizer>::CreateSlaveFlags()
{
- // Clean up the testing hierarchy, in case it wasn't cleaned up
- // properly from previous tests.
- AWAIT_READY(cgroups::cleanup(TEST_CGROUPS_HIERARCHY));
-}
+ slave::Flags flags = MesosTest::CreateSlaveFlags();
+#ifdef __linux__
+ flags.isolation = "cgroups/cpu,cgroups/mem";
+ flags.cgroups_hierarchy = baseHierarchy;
+ flags.cgroups_root = TEST_CGROUPS_ROOT + "_" + UUID::random().toString();
+#else
+ flags.isolation = "posix/cpu,posix/mem";
+#endif
-void IsolatorTest<slave::CgroupsIsolator>::TearDownTestCase()
-{
- AWAIT_READY(cgroups::cleanup(TEST_CGROUPS_HIERARCHY));
+ return flags;
}
-slave::Flags IsolatorTest<slave::CgroupsIsolator>::CreateSlaveFlags()
+#ifdef __linux__
+void ContainerizerTest<slave::MesosContainerizer>::SetUpTestCase()
{
- slave::Flags flags = MesosTest::CreateSlaveFlags();
-
- flags.cgroups_hierarchy = hierarchy;
+ // Clean up any testing hierarchies.
+ Try<std::set<std::string> > hierarchies = cgroups::hierarchies();
+ ASSERT_SOME(hierarchies);
+ foreach (const std::string& hierarchy, hierarchies.get()) {
+ if (strings::startsWith(hierarchy, TEST_CGROUPS_HIERARCHY)) {
+ AWAIT_READY(cgroups::cleanup(hierarchy));
+ }
+ }
+}
- // TODO(benh): Create a different cgroups root for each slave.
- flags.cgroups_root = TEST_CGROUPS_ROOT;
- return flags;
+void ContainerizerTest<slave::MesosContainerizer>::TearDownTestCase()
+{
+ // Clean up any testing hierarchies.
+ Try<std::set<std::string> > hierarchies = cgroups::hierarchies();
+ ASSERT_SOME(hierarchies);
+ foreach (const std::string& hierarchy, hierarchies.get()) {
+ if (strings::startsWith(hierarchy, TEST_CGROUPS_HIERARCHY)) {
+ AWAIT_READY(cgroups::cleanup(hierarchy));
+ }
+ }
}
-void IsolatorTest<slave::CgroupsIsolator>::SetUp()
+void ContainerizerTest<slave::MesosContainerizer>::SetUp()
{
MesosTest::SetUp();
- const std::string subsystems = "cpu,cpuacct,memory,freezer";
- Result<std::string> hierarchy_ = cgroups::hierarchy(subsystems);
- ASSERT_FALSE(hierarchy_.isError());
- if (hierarchy_.isNone()) {
- // Try to mount a hierarchy for testing.
- ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, subsystems))
- << "-------------------------------------------------------------\n"
- << "We cannot run any cgroups tests that require\n"
- << "a hierarchy with subsystems '" << subsystems << "'\n"
- << "because we failed to find an existing hierarchy\n"
- << "or create a new one. You can either remove all existing\n"
- << "hierarchies, or disable this test case\n"
- << "(i.e., --gtest_filter=-"
- << ::testing::UnitTest::GetInstance()
- ->current_test_info()
- ->test_case_name() << ".*).\n"
- << "-------------------------------------------------------------";
-
- hierarchy = TEST_CGROUPS_HIERARCHY;
- } else {
- hierarchy = hierarchy_.get();
+ subsystems.insert("cpu");
+ subsystems.insert("cpuacct");
+ subsystems.insert("memory");
+ subsystems.insert("freezer");
+
+ foreach (const std::string& subsystem, subsystems) {
+ // Establish the base hierarchy if this is the first subsystem checked.
+ if (baseHierarchy.empty()) {
+ Result<std::string> hierarchy = cgroups::hierarchy(subsystem);
+ ASSERT_FALSE(hierarchy.isError());
+
+ if (hierarchy.isNone()) {
+ baseHierarchy = TEST_CGROUPS_HIERARCHY;
+ } else {
+ // Strip the subsystem to get the base hierarchy.
+ baseHierarchy = strings::remove(
+ hierarchy.get(),
+ subsystem,
+ strings::SUFFIX);
+ }
+ }
+
+ // Mount the subsystem if necessary.
+ std::string hierarchy = path::join(baseHierarchy, subsystem);
+ Try<bool> mounted = cgroups::mounted(hierarchy, subsystem);
+ ASSERT_SOME(mounted);
+ if (!mounted.get()) {
+ ASSERT_SOME(cgroups::mount(hierarchy, subsystem))
+ << "-------------------------------------------------------------\n"
+ << "We cannot run any cgroups tests that require\n"
+ << "a hierarchy with subsystem '" << subsystem << "'\n"
+ << "because we failed to find an existing hierarchy\n"
+ << "or create a new one (tried '" << hierarchy << "').\n"
+ << "You can either remove all existing\n"
+ << "hierarchies, or disable this test case\n"
+ << "(i.e., --gtest_filter=-"
+ << ::testing::UnitTest::GetInstance()
+ ->current_test_info()
+ ->test_case_name() << ".*).\n"
+ << "-------------------------------------------------------------";
+ }
}
}
-void IsolatorTest<slave::CgroupsIsolator>::TearDown()
+void ContainerizerTest<slave::MesosContainerizer>::TearDown()
{
MesosTest::TearDown();
- Try<bool> exists = cgroups::exists(hierarchy, TEST_CGROUPS_ROOT);
- ASSERT_SOME(exists);
- if (exists.get()) {
- AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
+ foreach (const std::string& subsystem, subsystems) {
+ std::string hierarchy = path::join(baseHierarchy, subsystem);
+
+ Try<std::vector<std::string> > cgroups = cgroups::get(hierarchy);
+ CHECK_SOME(cgroups);
+
+ foreach (const std::string& cgroup, cgroups.get()) {
+ // Remove any cgroups that start with TEST_CGROUPS_ROOT
+ if (strings::startsWith(cgroup, TEST_CGROUPS_ROOT)) {
+ AWAIT_READY(cgroups::destroy(hierarchy, cgroup));
+ }
+ }
}
}
#endif // __linux__
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/mesos.hpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.hpp b/src/tests/mesos.hpp
index d7bdaee..018d4ff 100644
--- a/src/tests/mesos.hpp
+++ b/src/tests/mesos.hpp
@@ -48,10 +48,8 @@
#include "master/hierarchical_allocator_process.hpp"
#include "master/master.hpp"
-#ifdef __linux__
-#include "slave/cgroups_isolator.hpp"
-#endif
-#include "slave/isolator.hpp"
+#include "slave/containerizer/containerizer.hpp"
+#include "slave/containerizer/mesos_containerizer.hpp"
#include "slave/slave.hpp"
#include "tests/cluster.hpp"
@@ -62,7 +60,6 @@ namespace tests {
// Forward declarations.
class MockExecutor;
-class TestingIsolator;
class MesosTest : public ::testing::Test
@@ -108,14 +105,14 @@ protected:
MockExecutor* executor,
const Option<slave::Flags>& flags = None());
- // Starts a slave with the specified isolator and flags.
+ // Starts a slave with the specified containerizer and flags.
virtual Try<process::PID<slave::Slave> > StartSlave(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
const Option<slave::Flags>& flags = None());
- // Starts a slave with the specified isolator, detector and flags.
+ // Starts a slave with the specified containerizer, detector and flags.
virtual Try<process::PID<slave::Slave> > StartSlave(
- slave::Isolator* isolator,
+ slave::Containerizer* containerizer,
process::Owned<MasterDetector> detector,
const Option<slave::Flags>& flags = None());
@@ -151,15 +148,13 @@ protected:
Cluster cluster;
- // TestingIsolator(s) created during test that we need to cleanup.
- std::map<process::PID<slave::Slave>, TestingIsolator*> isolators;
+ // Containerizer(s) created during test that we need to cleanup.
+ std::map<process::PID<slave::Slave>, slave::Containerizer*> containerizers;
};
-
template <typename T>
-class IsolatorTest : public MesosTest {};
-
+class ContainerizerTest : public MesosTest {};
#ifdef __linux__
// Cgroups hierarchy used by the cgroups related tests.
@@ -170,7 +165,7 @@ const static std::string TEST_CGROUPS_ROOT = "mesos_test";
template <>
-class IsolatorTest<slave::CgroupsIsolator> : public MesosTest
+class ContainerizerTest<slave::MesosContainerizer> : public MesosTest
{
public:
static void SetUpTestCase();
@@ -182,7 +177,21 @@ protected:
virtual void TearDown();
private:
- std::string hierarchy;
+ // Base hierarchy for separately mounted cgroup controllers, e.g., if the
+ // base hierachy is /sys/fs/cgroup then each controller will be mounted to
+ // /sys/fs/cgroup/{controller}/.
+ std::string baseHierarchy;
+
+ // Set of cgroup subsystems used by the cgroups related tests.
+ hashset<std::string> subsystems;
+
+};
+#else
+template<>
+class ContainerizerTest<slave::MesosContainerizer> : public MesosTest
+{
+protected:
+ virtual slave::Flags CreateSlaveFlags();
};
#endif // __linux__
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/monitor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/monitor_tests.cpp b/src/tests/monitor_tests.cpp
index 7988c90..4b950e1 100644
--- a/src/tests/monitor_tests.cpp
+++ b/src/tests/monitor_tests.cpp
@@ -25,6 +25,7 @@
#include <process/clock.hpp>
#include <process/future.hpp>
+#include <process/gmock.hpp>
#include <process/gtest.hpp>
#include <process/http.hpp>
#include <process/pid.hpp>
@@ -35,7 +36,7 @@
#include "slave/constants.hpp"
#include "slave/monitor.hpp"
-#include "tests/isolator.hpp"
+#include "tests/containerizer.hpp"
using namespace mesos;
using namespace mesos::internal;
@@ -65,6 +66,9 @@ TEST(MonitorTest, Collection)
ExecutorID executorId;
executorId.set_value("executor");
+ ContainerID containerId;
+ containerId.set_value("container");
+
ExecutorInfo executorInfo;
executorInfo.mutable_executor_id()->CopyFrom(executorId);
executorInfo.mutable_framework_id()->CopyFrom(frameworkId);
@@ -95,12 +99,10 @@ TEST(MonitorTest, Collection)
statistics3.set_timestamp(
statistics3.timestamp() + slave::RESOURCE_MONITORING_INTERVAL.secs());
- TestingIsolator isolator;
-
- process::spawn(isolator);
+ TestContainerizer containerizer;
Future<Nothing> usage1, usage2, usage3;
- EXPECT_CALL(isolator, usage(frameworkId, executorId))
+ EXPECT_CALL(containerizer, usage(containerId))
.WillOnce(DoAll(FutureSatisfy(&usage1),
Return(statistics1)))
.WillOnce(DoAll(FutureSatisfy(&usage2),
@@ -108,20 +110,19 @@ TEST(MonitorTest, Collection)
.WillOnce(DoAll(FutureSatisfy(&usage3),
Return(statistics3)));
- slave::ResourceMonitor monitor(&isolator);
+ slave::ResourceMonitor monitor(&containerizer);
// We pause the clock first in order to make sure that we can
// advance time below to force the 'delay' in
- // ResourceMonitorProcess::watch to execute.
+ // ResourceMonitorProcess::start to execute.
process::Clock::pause();
- monitor.watch(
- frameworkId,
- executorId,
+ monitor.start(
+ containerId,
executorInfo,
slave::RESOURCE_MONITORING_INTERVAL);
- // Now wait for ResouorceMonitorProcess::watch to finish so we can
+ // Now wait for ResouorceMonitorProcess::start to finish so we can
// advance time to cause collection to begin.
process::Clock::settle();
@@ -130,7 +131,7 @@ TEST(MonitorTest, Collection)
AWAIT_READY(usage1);
- // Wait until the isolator has finished returning the statistics.
+ // Wait until the containerizer has finished returning the statistics.
process::Clock::settle();
// Expect a second collection to occur after the interval.
@@ -139,7 +140,7 @@ TEST(MonitorTest, Collection)
AWAIT_READY(usage2);
- // Wait until the isolator has finished returning the statistics.
+ // Wait until the containerizer has finished returning the statistics.
process::Clock::settle();
// Expect a third collection to occur after the interval.
@@ -148,17 +149,17 @@ TEST(MonitorTest, Collection)
AWAIT_READY(usage3);
- // Wait until the isolator has finished returning the statistics.
+ // Wait until the containerize has finished returning the statistics.
process::Clock::settle();
// Ensure the monitor stops polling the isolator.
- monitor.unwatch(frameworkId, executorId);
+ monitor.stop(containerId);
- // Wait until ResourceMonitorProcess::unwatch has completed.
+ // Wait until ResourceMonitorProcess::stop has completed.
process::Clock::settle();
- // This time, Isolator::usage should not get called.
- EXPECT_CALL(isolator, usage(frameworkId, executorId))
+ // This time, Containerizer::usage should not get called.
+ EXPECT_CALL(containerizer, usage(containerId))
.Times(0);
process::Clock::advance(slave::RESOURCE_MONITORING_INTERVAL);
@@ -174,6 +175,9 @@ TEST(MonitorTest, Statistics)
ExecutorID executorId;
executorId.set_value("executor");
+ ContainerID containerId;
+ containerId.set_value("container");
+
ExecutorInfo executorInfo;
executorInfo.mutable_executor_id()->CopyFrom(executorId);
executorInfo.mutable_framework_id()->CopyFrom(frameworkId);
@@ -194,24 +198,21 @@ TEST(MonitorTest, Statistics)
statistics.set_mem_limit_bytes(2048);
statistics.set_timestamp(0);
- TestingIsolator isolator;
-
- process::spawn(isolator);
+ TestContainerizer containerizer;
Future<Nothing> usage;
- EXPECT_CALL(isolator, usage(frameworkId, executorId))
+ EXPECT_CALL(containerizer, usage(containerId))
.WillOnce(DoAll(FutureSatisfy(&usage),
Return(statistics)));
- slave::ResourceMonitor monitor(&isolator);
+ slave::ResourceMonitor monitor(&containerizer);
// We pause the clock first to ensure unexpected collections
// are avoided.
process::Clock::pause();
- monitor.watch(
- frameworkId,
- executorId,
+ monitor.start(
+ containerId,
executorInfo,
slave::RESOURCE_MONITORING_INTERVAL);
@@ -274,13 +275,13 @@ TEST(MonitorTest, Statistics)
response);
// Ensure the monitor stops polling the isolator.
- monitor.unwatch(frameworkId, executorId);
+ monitor.stop(containerId);
- // Wait until ResourceMonitorProcess::unwatch has completed.
+ // Wait until ResourceMonitorProcess::stop has completed.
process::Clock::settle();
- // This time, Isolator::usage should not get called.
- EXPECT_CALL(isolator, usage(frameworkId, executorId))
+ // This time, Containerizer::usage should not get called.
+ EXPECT_CALL(containerizer, usage(containerId))
.Times(0);
response = process::http::get(upid, "statistics.json");
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/paths_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/paths_tests.cpp b/src/tests/paths_tests.cpp
index 40c644c..8e11264 100644
--- a/src/tests/paths_tests.cpp
+++ b/src/tests/paths_tests.cpp
@@ -42,12 +42,12 @@ class PathsTest : public ::testing::Test
{
public:
PathsTest()
- : uuid(UUID::random())
{
slaveId.set_value("slave1");
frameworkId.set_value("framework1");
executorId.set_value("executor1");
taskId.set_value("task1");
+ containerId.set_value(UUID::random().toString());
Try<string> path = os::mkdtemp();
CHECK_SOME(path) << "Failed to mkdtemp";
@@ -64,7 +64,7 @@ protected:
FrameworkID frameworkId;
ExecutorID executorId;
TaskID taskId;
- UUID uuid;
+ ContainerID containerId;
string rootDir;
};
@@ -72,12 +72,12 @@ protected:
TEST_F(PathsTest, CreateExecutorDirectory)
{
const string& result = paths::createExecutorDirectory(
- rootDir, slaveId, frameworkId, executorId, uuid);
+ rootDir, slaveId, frameworkId, executorId, containerId);
// Expected directory layout.
string dir = rootDir + "/slaves/" + slaveId.value() + "/frameworks/"
+ frameworkId.value() + "/executors/" + executorId.value()
- + "/runs/" + uuid.toString();
+ + "/runs/" + containerId.value();
ASSERT_EQ(dir, result);
}
@@ -97,13 +97,13 @@ TEST_F(PathsTest, format)
ASSERT_EQ(dir, paths::getExecutorPath(
rootDir, slaveId, frameworkId, executorId));
- dir += "/runs/" + uuid.toString();
+ dir += "/runs/" + containerId.value();
ASSERT_EQ(dir, paths::getExecutorRunPath(
- rootDir, slaveId, frameworkId, executorId, uuid));
+ rootDir, slaveId, frameworkId, executorId, containerId));
dir += "/tasks/" + taskId.value();
ASSERT_EQ(dir, paths::getTaskPath(
- rootDir, slaveId, frameworkId, executorId, uuid, taskId));
+ rootDir, slaveId, frameworkId, executorId, containerId, taskId));
}
} // namespace paths {
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 4779509..0fc256b 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -39,22 +39,16 @@
#include "common/protobuf_utils.hpp"
-#ifdef __linux__
-#include "linux/cgroups.hpp"
-#endif
-
#include "master/detector.hpp"
#include "master/master.hpp"
#include "slave/gc.hpp"
-#ifdef __linux__
-#include "slave/cgroups_isolator.hpp"
-#endif
#include "slave/paths.hpp"
-#include "slave/process_isolator.hpp"
#include "slave/slave.hpp"
#include "slave/state.hpp"
+#include "slave/containerizer/containerizer.hpp"
+
#include "messages/messages.hpp"
#include "tests/mesos.hpp"
@@ -70,10 +64,7 @@ using namespace process;
using mesos::internal::master::Master;
using mesos::internal::slave::GarbageCollectorProcess;
-#ifdef __linux__
-using mesos::internal::slave::CgroupsIsolator;
-#endif
-using mesos::internal::slave::ProcessIsolator;
+using mesos::internal::slave::Containerizer;
using std::map;
using std::string;
@@ -115,14 +106,13 @@ TEST_F(SlaveStateTest, CheckpointString)
ASSERT_SOME_EQ(expected, os::read(file));
}
-
template <typename T>
-class SlaveRecoveryTest : public IsolatorTest<T>
+class SlaveRecoveryTest : public ContainerizerTest<T>
{
public:
virtual slave::Flags CreateSlaveFlags()
{
- slave::Flags flags = IsolatorTest<T>::CreateSlaveFlags();
+ slave::Flags flags = ContainerizerTest<T>::CreateSlaveFlags();
// Setup recovery slave flags.
flags.checkpoint = true;
@@ -133,15 +123,12 @@ public:
}
};
+// Note: Although these tests are typed it is Containerizer::create() that
+// decides which Containerizer to create based on the flags - see
+// SlaveRecoveryTest.
+typedef ::testing::Types<slave::MesosContainerizer> ContainerizerTypes;
-#ifdef __linux__
-typedef ::testing::Types<ProcessIsolator, CgroupsIsolator> IsolatorTypes;
-#else
-typedef ::testing::Types<ProcessIsolator> IsolatorTypes;
-#endif
-
-TYPED_TEST_CASE(SlaveRecoveryTest, IsolatorTypes);
-
+TYPED_TEST_CASE(SlaveRecoveryTest, ContainerizerTypes);
// Enable checkpointing on the slave and ensure recovery works.
TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
@@ -149,11 +136,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -244,21 +232,21 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
ASSERT_TRUE(state.frameworks[frameworkId].executors.contains(executorId));
// Check executor id and pids.
- const Option<UUID>& uuid=
+ const Option<ContainerID>& containerId =
state.frameworks[frameworkId].executors[executorId].latest;
- ASSERT_SOME(uuid);
+ ASSERT_SOME(containerId);
ASSERT_TRUE(state
.frameworks[frameworkId]
.executors[executorId]
- .runs.contains(uuid.get()));
+ .runs.contains(containerId.get()));
ASSERT_SOME_EQ(
libprocessPid,
state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.libprocessPid);
@@ -266,7 +254,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
ASSERT_TRUE(state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.tasks.contains(task.task_id()));
const Task& t = mesos::internal::protobuf::createTask(
@@ -277,7 +265,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.tasks[task.task_id()]
.info);
@@ -287,7 +275,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.tasks[task.task_id()]
.updates.size());
@@ -296,14 +284,14 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.tasks[task.task_id()]
.updates.front().uuid());
ASSERT_TRUE(state
.frameworks[frameworkId]
.executors[executorId]
- .runs[uuid.get()]
+ .runs[containerId.get()]
.tasks[task.task_id()]
.acks.contains(UUID::fromBytes(ack.get().uuid())));
@@ -314,7 +302,9 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+
+ delete containerizer.get();
}
@@ -325,11 +315,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverStatusUpdateManager)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -376,16 +367,18 @@ TYPED_TEST(SlaveRecoveryTest, RecoverStatusUpdateManager)
AWAIT_READY(update);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
.WillOnce(FutureArg<1>(&status))
.WillRepeatedly(Return()); // Ignore subsequent updates.
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(status);
@@ -400,7 +393,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverStatusUpdateManager)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -412,11 +406,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -455,6 +450,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
AWAIT_READY(statusUpdate);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<Message> reregisterExecutorMessage =
FUTURE_MESSAGE(Eq(ReregisterExecutorMessage().GetTypeName()), _, _);
@@ -464,10 +460,11 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
.WillOnce(FutureArg<1>(&status))
.WillRepeatedly(Return()); // Ignore subsequent updates.
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
// Ensure the executor re-registers.
@@ -496,7 +493,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -508,11 +506,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -551,6 +550,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
UPID executorPid = registerExecutor.get().from;
this->Stop(slave.get());
+ delete containerizer1.get();
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -559,15 +559,16 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -601,7 +602,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -614,11 +616,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -664,6 +667,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
AWAIT_READY(ack);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -674,15 +678,16 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -716,7 +721,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -732,14 +738,15 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
// Set a short recovery timeout, as we can't control the executor
// driver time when using the process / cgroups isolators.
slave::Flags flags = this->CreateSlaveFlags();
flags.recovery_timeout = Milliseconds(1);
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -779,6 +786,7 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
AWAIT_READY(_statusUpdateAcknowledgement);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -790,10 +798,11 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -812,7 +821,8 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -825,11 +835,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -873,19 +884,21 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
AWAIT_READY(schedule);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<Nothing> schedule2 = FUTURE_DISPATCH(
_, &GarbageCollectorProcess::schedule);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
// We use 'gc.schedule' as a proxy for the cleanup of the executor.
@@ -899,7 +912,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -911,11 +925,12 @@ TYPED_TEST(SlaveRecoveryTest, CleanupExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -955,6 +970,7 @@ TYPED_TEST(SlaveRecoveryTest, CleanupExecutor)
AWAIT_READY(ack);
this->Stop(slave.get());
+ delete containerizer1.get();
// Slave in cleanup mode shouldn't reregister with slave and hence
// no offers should be made by the master.
@@ -968,11 +984,12 @@ TYPED_TEST(SlaveRecoveryTest, CleanupExecutor)
Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover);
// Restart the slave in 'cleanup' recovery mode with a new isolator.
- TypeParam isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
flags.recover = "cleanup";
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -996,7 +1013,8 @@ TYPED_TEST(SlaveRecoveryTest, CleanupExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1007,9 +1025,12 @@ TYPED_TEST(SlaveRecoveryTest, RemoveNonCheckpointingFramework)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator;
+ slave::Flags flags = this->CreateSlaveFlags();
+
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer);
- Try<PID<Slave> > slave = this->StartSlave(&isolator);
+ Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1069,6 +1090,7 @@ TYPED_TEST(SlaveRecoveryTest, RemoveNonCheckpointingFramework)
.WillOnce(FutureArg<1>(&status2));
this->Stop(slave.get());
+ delete containerizer.get();
// Scheduler should receive the TASK_LOST updates.
AWAIT_READY(status1);
@@ -1080,7 +1102,7 @@ TYPED_TEST(SlaveRecoveryTest, RemoveNonCheckpointingFramework)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
}
@@ -1091,11 +1113,12 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingFramework)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1161,7 +1184,8 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingFramework)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer.get();
}
@@ -1172,8 +1196,6 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator;
-
// Disable checkpointing for the slave.
slave::Flags flags = this->CreateSlaveFlags();
flags.checkpoint = false;
@@ -1183,7 +1205,10 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
Future<RegisterSlaveMessage> registerSlaveMessage =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlaveMessage);
@@ -1215,10 +1240,10 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer.get();
}
-
// Scheduler asks a restarted slave to kill a task that has been
// running before the slave restarted. This test ensures that a
// restarted slave is able to communicate with all components
@@ -1228,11 +1253,12 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1272,6 +1298,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
AWAIT_READY(ack);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
@@ -1279,9 +1306,10 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
// Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -1329,7 +1357,8 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1340,12 +1369,13 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
flags.strict = false;
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1398,6 +1428,7 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
AWAIT_READY(status);
this->Stop(slave.get());
+ delete containerizer1.get();
// Shut down the executor manually so that it doesn't hang around
// after the test finishes.
@@ -1411,15 +1442,16 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
Future<RegisterSlaveMessage> registerSlave =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlave);
@@ -1432,7 +1464,8 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1445,12 +1478,13 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
flags.strict = false;
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1503,6 +1537,7 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
AWAIT_READY(status);
this->Stop(slave.get());
+ delete containerizer1.get();
// Shut down the executor manually so that it doesn't hang around
// after the test finishes.
@@ -1522,14 +1557,15 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
// Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -1573,7 +1609,8 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1584,11 +1621,12 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -1659,16 +1697,18 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
Clock::resume();
this->Stop(slave.get(), true); // Send a "shut down".
+ delete containerizer1.get();
Future<vector<Offer> > offers3;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers3))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- // Now restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Now restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
// Ensure that the slave registered with a new id.
@@ -1686,7 +1726,8 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1701,11 +1742,12 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
Future<RegisterSlaveMessage> registerSlaveMessage =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
- TypeParam isolator;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
+ Try<Containerizer*> containerizer = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlaveMessage);
@@ -1784,7 +1826,8 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer.get();
}
@@ -1799,11 +1842,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileKillTask)
Future<RegisterSlaveMessage> registerSlaveMessage =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlaveMessage);
@@ -1848,6 +1892,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileKillTask)
AWAIT_READY(_statusUpdateAcknowledgement);
this->Stop(slave.get());
+ delete containerizer1.get();
// Now send a KillTask message to the master. This will not be
// received by the slave because it is down.
@@ -1857,15 +1902,16 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileKillTask)
EXPECT_CALL(sched, statusUpdate(_, _))
.WillOnce(FutureArg<1>(&status));
- // Now restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Now restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
Future<vector<Offer> > offers2;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
// Scheduler should get a TASK_KILLED message.
@@ -1880,7 +1926,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileKillTask)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1895,11 +1942,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileShutdownFramework)
Future<RegisterSlaveMessage> registerSlaveMessage =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlaveMessage);
@@ -1945,6 +1993,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileShutdownFramework)
AWAIT_READY(_statusUpdateAcknowledgement);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<UnregisterFrameworkMessage> unregisterFrameworkMessage =
FUTURE_PROTOBUF(UnregisterFrameworkMessage(), _, _);
@@ -1962,10 +2011,11 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileShutdownFramework)
Future<Nothing> executorTerminated =
FUTURE_DISPATCH(_, &Slave::executorTerminated);
- // Now restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Now restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
// Slave should get a ShutdownFrameworkMessage.
@@ -1974,7 +2024,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileShutdownFramework)
// Ensure that the executor is terminated.
AWAIT_READY(executorTerminated);
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -1994,13 +2045,14 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
Try<PID<Master> > master = this->StartMaster(&allocator);
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
EXPECT_CALL(allocator, slaveAdded(_, _, _));
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -2049,6 +2101,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
EXPECT_CALL(allocator, slaveDisconnected(_));
this->Stop(slave.get());
+ delete containerizer1.get();
// Construct the framework meta directory that needs wiping.
string frameworkPath = paths::getFrameworkPath(
@@ -2099,10 +2152,11 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
.WillOnce(FutureArg<1>(&offers2))
.WillRepeatedly(Return()); // Ignore subsequent offers.
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -2143,7 +2197,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -2156,10 +2211,12 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
// Launch the first (i.e., failing) scheduler.
@@ -2202,6 +2259,7 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
AWAIT_READY(_statusUpdateAcknowledgement);
this->Stop(slave.get());
+ delete containerizer1.get();
// Now launch the second (i.e., failover) scheduler using the
// framework id recorded from the first scheduler.
@@ -2233,10 +2291,11 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
Future<ReregisterSlaveMessage> reregisterSlaveMessage =
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -2287,7 +2346,8 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
driver1.stop();
driver1.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -2309,10 +2369,12 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
// Drop all the PONGs to simulate slave partition.
DROP_MESSAGES(Eq("PONG"), _, _);
- TypeParam isolator1;
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
// Enable checkpointing for the framework.
@@ -2404,14 +2466,16 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
Clock::settle();
this->Stop(slave.get());
+ delete containerizer1.get();
Future<RegisterSlaveMessage> registerSlaveMessage =
FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
// Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
AWAIT_READY(registerSlaveMessage);
@@ -2421,7 +2485,8 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -2434,11 +2499,12 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
MockScheduler sched;
@@ -2483,6 +2549,7 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
AWAIT_READY(_statusUpdateAcknowledgement);
this->Stop(slave.get());
+ delete containerizer1.get();
// Step 2. Simulate failed over master by restarting the master.
this->Stop(master.get());
@@ -2508,9 +2575,10 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
// Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -2558,7 +2626,8 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
@@ -2571,11 +2640,12 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
- TypeParam isolator1;
-
slave::Flags flags = this->CreateSlaveFlags();
- Try<PID<Slave> > slave = this->StartSlave(&isolator1, flags);
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
ASSERT_SOME(slave);
// Framework 1.
@@ -2661,16 +2731,18 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
AWAIT_READY(_statusUpdateAcknowledgement2);
this->Stop(slave.get());
+ delete containerizer1.get();
Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
Future<ReregisterSlaveMessage> reregisterSlaveMessage =
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
- // Restart the slave (use same flags) with a new isolator.
- TypeParam isolator2;
+ // Restart the slave (use same flags) with a new containerizer.
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
- slave = this->StartSlave(&isolator2, flags);
+ slave = this->StartSlave(containerizer2.get(), flags);
ASSERT_SOME(slave);
Clock::pause();
@@ -2727,39 +2799,14 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
driver2.stop();
driver2.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer2.get();
}
-// Create a test fixture for those slave recovery tests that only work
-// when ProcessIsolator is used.
-// TODO(jieyu): We use typed test here because it magically allows us
-// to access protected members in Slave (e.g. Slave::reconnect).
-template <typename T>
-class SlaveRecoveryProcessIsolatorTest : public IsolatorTest<T>
-{
-public:
- virtual slave::Flags CreateSlaveFlags()
- {
- slave::Flags flags = IsolatorTest<T>::CreateSlaveFlags();
-
- // Setup recovery slave flags.
- flags.checkpoint = true;
- flags.recover = "reconnect";
- flags.strict = true;
-
- return flags;
- }
-};
-
-
-TYPED_TEST_CASE(SlaveRecoveryProcessIsolatorTest,
- ::testing::Types<ProcessIsolator>);
-
-
// This test verifies that slave recovery works properly even if
// multiple slaves are co-located on the same host.
-TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
+TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
{
Try<PID<Master> > master = this->StartMaster();
ASSERT_SOME(master);
@@ -2783,9 +2830,10 @@ TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
// Start the first slave.
slave::Flags flags1 = this->CreateSlaveFlags();
- slave::ProcessIsolator isolator1;
+ Try<Containerizer*> containerizer1 = Containerizer::create(flags1, true);
+ ASSERT_SOME(containerizer1);
- Try<PID<Slave> > slave1 = this->StartSlave(&isolator1, flags1);
+ Try<PID<Slave> > slave1 = this->StartSlave(containerizer1.get(), flags1);
ASSERT_SOME(slave1);
AWAIT_READY(offers1);
@@ -2813,9 +2861,10 @@ TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
// Start the second slave.
slave::Flags flags2 = this->CreateSlaveFlags();
- slave::ProcessIsolator isolator2;
+ Try<Containerizer*> containerizer2 = Containerizer::create(flags2, true);
+ ASSERT_SOME(containerizer2);
- Try<PID<Slave> > slave2 = this->StartSlave(&isolator2, flags2);
+ Try<PID<Slave> > slave2 = this->StartSlave(containerizer2.get(), flags2);
ASSERT_SOME(slave2);
AWAIT_READY(offers2);
@@ -2838,7 +2887,9 @@ TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
AWAIT_READY(_statusUpdateAcknowledgement2);
this->Stop(slave1.get());
+ delete containerizer1.get();
this->Stop(slave2.get());
+ delete containerizer2.get();
Future<Nothing> _recover1 = FUTURE_DISPATCH(_, &Slave::_recover);
Future<Nothing> _recover2 = FUTURE_DISPATCH(_, &Slave::_recover);
@@ -2848,15 +2899,17 @@ TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
Future<ReregisterSlaveMessage> reregisterSlave2 =
FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
- // Restart both slaves using the same flags with new isolators.
- slave::ProcessIsolator isolator3;
+ // Restart both slaves using the same flags with new containerizers.
+ Try<Containerizer*> containerizer3 = Containerizer::create(flags1, true);
+ ASSERT_SOME(containerizer3);
- slave1 = this->StartSlave(&isolator3, flags1);
+ slave1 = this->StartSlave(containerizer3.get(), flags1);
ASSERT_SOME(slave1);
- slave::ProcessIsolator isolator4;
+ Try<Containerizer*> containerizer4 = Containerizer::create(flags2, true);
+ ASSERT_SOME(containerizer4);
- slave2 = this->StartSlave(&isolator4, flags2);
+ slave2 = this->StartSlave(containerizer4.get(), flags2);
ASSERT_SOME(slave2);
Clock::pause();
@@ -2910,5 +2963,7 @@ TYPED_TEST(SlaveRecoveryProcessIsolatorTest, MultipleSlaves)
driver.stop();
driver.join();
- this->Shutdown(); // Shutdown before isolator(s) get deallocated.
+ this->Shutdown();
+ delete containerizer3.get();
+ delete containerizer4.get();
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f3f8f35/src/tests/test_framework_test.sh
----------------------------------------------------------------------
diff --git a/src/tests/test_framework_test.sh b/src/tests/test_framework_test.sh
index 277245d..a521b47 100755
--- a/src/tests/test_framework_test.sh
+++ b/src/tests/test_framework_test.sh
@@ -1,5 +1,7 @@
#!/usr/bin/env bash
+set -x
+
# Expecting MESOS_SOURCE_DIR and MESOS_BUILD_DIR to be in environment.
env | grep MESOS_SOURCE_DIR >/dev/null
[02/11] git commit: Added missing return value for
hashmap::containsValue and test.
Posted by vi...@apache.org.
Added missing return value for hashmap::containsValue and test.
Review: https://reviews.apache.org/r/17834
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c578fe54
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c578fe54
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c578fe54
Branch: refs/heads/master
Commit: c578fe5480c4d357db2c3f1019722b7fbe5e33f8
Parents: 413947c
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:58:18 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:49 2014 -0800
----------------------------------------------------------------------
.../3rdparty/stout/include/stout/hashmap.hpp | 1 +
.../libprocess/3rdparty/stout/tests/hashmap_tests.cpp | 13 +++++++++++++
2 files changed, 14 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/c578fe54/3rdparty/libprocess/3rdparty/stout/include/stout/hashmap.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/3rdparty/stout/include/stout/hashmap.hpp b/3rdparty/libprocess/3rdparty/stout/include/stout/hashmap.hpp
index d14b4aa..aa4d9ba 100644
--- a/3rdparty/libprocess/3rdparty/stout/include/stout/hashmap.hpp
+++ b/3rdparty/libprocess/3rdparty/stout/include/stout/hashmap.hpp
@@ -53,6 +53,7 @@ public:
return true;
}
}
+ return false;
}
// Inserts a key, value pair into the map replacing an old value
http://git-wip-us.apache.org/repos/asf/mesos/blob/c578fe54/3rdparty/libprocess/3rdparty/stout/tests/hashmap_tests.cpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/3rdparty/stout/tests/hashmap_tests.cpp b/3rdparty/libprocess/3rdparty/stout/tests/hashmap_tests.cpp
index ff8bafb..eb3abfc 100644
--- a/3rdparty/libprocess/3rdparty/stout/tests/hashmap_tests.cpp
+++ b/3rdparty/libprocess/3rdparty/stout/tests/hashmap_tests.cpp
@@ -23,3 +23,16 @@ TEST(HashMapTest, Insert)
ASSERT_SOME_EQ(4, map.get("def"));
ASSERT_EQ(2, map.size());
}
+
+
+TEST(HashMapTest, Contains)
+{
+ hashmap<string, int> map;
+ map["abc"] = 1;
+
+ ASSERT_TRUE(map.contains("abc"));
+ ASSERT_TRUE(map.containsValue(1));
+
+ ASSERT_FALSE(map.contains("def"));
+ ASSERT_FALSE(map.containsValue(2));
+}
[03/11] git commit: Containerizer - isolators (part 3).
Posted by vi...@apache.org.
Containerizer - isolators (part 3).
Isolators perform isolator for the MesosContainerizer.
Isolator interface and implementations of Posix CPU and Mem isolators
(no isolation, just usage())
Review: https://reviews.apache.org/r/16150
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d5266b8c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d5266b8c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d5266b8c
Branch: refs/heads/master
Commit: d5266b8c9c76cccea6b7a70c78ec4c81b4a9a6b4
Parents: 71c6b5c
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:28:20 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:50 2014 -0800
----------------------------------------------------------------------
src/Makefile.am | 3 +
src/slave/containerizer/isolator.cpp | 104 ++++++++++
src/slave/containerizer/isolator.hpp | 144 +++++++++++++
src/slave/containerizer/isolators/posix.hpp | 252 +++++++++++++++++++++++
4 files changed, 503 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index a4b5a52..d9cb9e9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -177,6 +177,7 @@ libmesos_no_3rdparty_la_SOURCES = \
slave/slave.cpp \
slave/http.cpp \
slave/containerizer/containerizer.cpp \
+ slave/containerizer/isolator.cpp \
slave/containerizer/launcher.cpp \
slave/containerizer/mesos_containerizer.cpp \
slave/status_update_manager.cpp \
@@ -233,6 +234,8 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp \
messages/messages.hpp slave/constants.hpp \
slave/containerizer/cgroups_launcher.hpp \
slave/containerizer/containerizer.hpp \
+ slave/containerizer/isolator.hpp \
+ slave/containerizer/isolators/posix.hpp \
slave/containerizer/launcher.hpp \
slave/containerizer/mesos_containerizer.hpp \
slave/flags.hpp slave/gc.hpp slave/monitor.hpp \
http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolator.cpp b/src/slave/containerizer/isolator.cpp
new file mode 100644
index 0000000..f7935b3
--- /dev/null
+++ b/src/slave/containerizer/isolator.cpp
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <process/dispatch.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+using namespace process;
+
+using std::string;
+using std::list;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+
+Isolator::Isolator(Owned<IsolatorProcess> _process)
+ : process(_process)
+{
+ process::spawn(CHECK_NOTNULL(process.get()));
+}
+
+
+Isolator::~Isolator()
+{
+ process::terminate(process.get());
+ process::wait(process.get());
+}
+
+
+Future<Nothing> Isolator::recover(const list<state::RunState>& state)
+{
+ return dispatch(process.get(), &IsolatorProcess::recover, state);
+}
+
+
+Future<Nothing> Isolator::prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo)
+{
+ return dispatch(process.get(),
+ &IsolatorProcess::prepare,
+ containerId,
+ executorInfo);
+}
+
+
+Future<Option<CommandInfo> > Isolator::isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+{
+ return dispatch(process.get(), &IsolatorProcess::isolate, containerId, pid);
+}
+
+
+Future<Limitation> Isolator::watch(const ContainerID& containerId)
+{
+ return dispatch(process.get(), &IsolatorProcess::watch, containerId);
+}
+
+
+Future<Nothing> Isolator::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ return dispatch(
+ process.get(),
+ &IsolatorProcess::update,
+ containerId,
+ resources);
+}
+
+
+Future<ResourceStatistics> Isolator::usage(
+ const ContainerID& containerId) const
+{
+ return dispatch(process.get(), &IsolatorProcess::usage, containerId);
+}
+
+
+Future<Nothing> Isolator::cleanup(const ContainerID& containerId)
+{
+ return dispatch(process.get(), &IsolatorProcess::cleanup, containerId);
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolator.hpp b/src/slave/containerizer/isolator.hpp
new file mode 100644
index 0000000..fc6c9ab
--- /dev/null
+++ b/src/slave/containerizer/isolator.hpp
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ISOLATOR_HPP__
+#define __ISOLATOR_HPP__
+
+#include <list>
+#include <string>
+
+#include <process/dispatch.hpp>
+#include <process/future.hpp>
+#include <process/owned.hpp>
+#include <process/process.hpp>
+
+#include <stout/try.hpp>
+
+#include "slave/flags.hpp"
+#include "slave/state.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Forward declaration.
+class IsolatorProcess;
+
+// Information when an executor is impacted by a resource limitation and should
+// be terminated. Intended to support resources like memory where the Linux
+// kernel may invoke the OOM killer, killing some/all of a container's
+// processes.
+struct Limitation
+{
+ Limitation(
+ const Resource& _resource,
+ const std::string& _message)
+ : resource(_resource),
+ message(_message) {}
+
+ // Resource (type and value) that triggered the limitation.
+ const Resource resource;
+ // Description of the limitation.
+ const std::string message;
+};
+
+
+class Isolator
+{
+public:
+ Isolator(process::Owned<IsolatorProcess> process);
+ ~Isolator();
+
+ // Recover containers from the run states.
+ process::Future<Nothing> recover(
+ const std::list<state::RunState>& states);
+
+ // Prepare for isolation of the executor.
+ process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo);
+
+ // Isolate the executor. Any steps that require execution in the
+ // containerized context (e.g. inside a network namespace) can be returned in
+ // the optional CommandInfo and they will be run by the Launcher. This could
+ // be a simple command or a URI (including a local file) that will be fetched
+ // and executed.
+ process::Future<Option<CommandInfo> > isolate(
+ const ContainerID& containerId,
+ pid_t pid);
+
+ // Watch the containerized executor and report if any resource constraint
+ // impacts the container, e.g., the kernel killing some processes.
+ process::Future<Limitation> watch(const ContainerID& containerId);
+
+ // Update the resources allocated to the container.
+ process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ // Gather resource usage statistics for the container.
+ process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId) const;
+
+ // Clean up a terminated container. This is called after the executor and all
+ // processes in the container have terminated.
+ process::Future<Nothing> cleanup(const ContainerID& containerId);
+
+private:
+ Isolator(const Isolator&); // Not copyable.
+ Isolator& operator=(const Isolator&); // Not assignable.
+
+ process::Owned<IsolatorProcess> process;
+};
+
+
+class IsolatorProcess : public process::Process<IsolatorProcess>
+{
+public:
+ virtual ~IsolatorProcess() {}
+
+ virtual process::Future<Nothing> recover(
+ const std::list<state::RunState>& state) = 0;
+
+ virtual process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo) = 0;
+
+ virtual process::Future<Option<CommandInfo> > isolate(
+ const ContainerID& containerId,
+ pid_t pid) = 0;
+
+ virtual process::Future<Limitation> watch(
+ const ContainerID& containerId) = 0;
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources) = 0;
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId) = 0;
+
+ virtual process::Future<Nothing> cleanup(const ContainerID& containerId) = 0;
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolators/posix.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/posix.hpp b/src/slave/containerizer/isolators/posix.hpp
new file mode 100644
index 0000000..7fbc6dd
--- /dev/null
+++ b/src/slave/containerizer/isolators/posix.hpp
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSIX_ISOLATOR_HPP__
+#define __POSIX_ISOLATOR_HPP__
+
+#include <stout/hashmap.hpp>
+#include <stout/os/pstree.hpp>
+
+#include <process/future.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// A basic IsolatorProcess that keeps track of the pid but doesn't do any
+// resource isolation. Subclasses must implement usage() for their appropriate
+// resource(s).
+class PosixIsolatorProcess : public IsolatorProcess
+{
+public:
+ virtual process::Future<Nothing> recover(
+ const std::list<state::RunState>& state)
+ {
+ foreach (const state::RunState& run, state) {
+ if (!run.id.isSome()) {
+ return process::Failure("ContainerID is required to recover");
+ }
+
+ if (!run.forkedPid.isSome()) {
+ return process::Failure("Executor pid is required to recover");
+ }
+
+ // This should (almost) never occur: see comment in
+ // PosixLauncher::recover().
+ if (pids.contains(run.id.get())) {
+ return process::Failure("Container already recovered");
+ }
+
+ pids.put(run.id.get(), run.forkedPid.get());
+
+ process::Owned<process::Promise<Limitation> > promise(
+ new process::Promise<Limitation>());
+ promises.put(run.id.get(), promise);
+ }
+
+ return Nothing();
+ }
+
+ virtual process::Future<Nothing> prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo)
+ {
+ if (promises.contains(containerId)) {
+ return process::Failure("Container " + stringify(containerId) +
+ " has already been prepared");
+ }
+
+ process::Owned<process::Promise<Limitation> > promise(
+ new process::Promise<Limitation>());
+ promises.put(containerId, promise);
+
+ return Nothing();
+ }
+
+ virtual process::Future<Option<CommandInfo> > isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+ {
+ if (!promises.contains(containerId)) {
+ return process::Failure("Unknown container: " + stringify(containerId));
+ }
+
+ pids.put(containerId, pid);
+
+ return None();
+ }
+
+ virtual process::Future<Limitation> watch(
+ const ContainerID& containerId)
+ {
+ if (!promises.contains(containerId)) {
+ return process::Failure("Unknown container: " + stringify(containerId));
+ }
+
+ return promises[containerId]->future();
+ }
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources)
+ {
+ if (!promises.contains(containerId)) {
+ return process::Failure("Unknown container: " + stringify(containerId));
+ }
+
+ // No resources are actually isolated so nothing to do.
+ return Nothing();
+ }
+
+ virtual process::Future<Nothing> cleanup(const ContainerID& containerId)
+ {
+ if (!promises.contains(containerId)) {
+ return process::Failure("Unknown container: " + stringify(containerId));
+ }
+
+ // TODO(idownes): We should discard the container's promise here to signal
+ // to anyone that holds the future from watch().
+ promises.erase(containerId);
+
+ pids.erase(containerId);
+
+ return Nothing();
+ }
+
+protected:
+ hashmap<ContainerID, pid_t> pids;
+ hashmap<ContainerID,
+ process::Owned<process::Promise<Limitation> > > promises;
+};
+
+
+class PosixCpuIsolatorProcess : public PosixIsolatorProcess
+{
+public:
+ static Try<Isolator*> create(const Flags& flags)
+ {
+ process::Owned<IsolatorProcess> process(new PosixCpuIsolatorProcess());
+
+ return new Isolator(process);
+ }
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId)
+ {
+ if (!pids.contains(containerId)) {
+ LOG(WARNING) << "No resource usage for unknown container '"
+ << containerId << "'";
+ return ResourceStatistics();
+ }
+
+ Try<os::ProcessTree> tree = os::pstree(pids.get(containerId).get());
+
+ if (!tree.isSome()) {
+ return ResourceStatistics();
+ }
+
+ ResourceStatistics result;
+
+ std::deque<os::ProcessTree> trees;
+ trees.push_back(tree.get());
+
+ while (!trees.empty()) {
+ os::ProcessTree root = trees.front();
+
+ // We only show utime and stime when both are available, otherwise
+ // we're exposing a partial view of the CPU times.
+ if (root.process.utime.isSome() && root.process.stime.isSome()) {
+ result.set_cpus_user_time_secs(
+ result.cpus_user_time_secs() + root.process.utime.get().secs());
+ result.set_cpus_system_time_secs(
+ result.cpus_system_time_secs() + root.process.stime.get().secs());
+ }
+
+ trees.pop_front();
+ foreach (const os::ProcessTree& child, root.children) {
+ trees.push_back(child);
+ }
+ }
+
+ return result;
+ }
+
+private:
+ PosixCpuIsolatorProcess() {}
+};
+
+
+class PosixMemIsolatorProcess : public PosixIsolatorProcess
+{
+public:
+ static Try<Isolator*> create(const Flags& flags)
+ {
+ process::Owned<IsolatorProcess> process(new PosixMemIsolatorProcess());
+
+ return new Isolator(process);
+ }
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId)
+ {
+ if (!pids.contains(containerId)) {
+ LOG(WARNING) << "No resource usage for unknown container '"
+ << containerId << "'";
+ return ResourceStatistics();
+ }
+
+ Try<os::ProcessTree> tree = os::pstree(pids.get(containerId).get());
+
+ if (!tree.isSome()) {
+ return ResourceStatistics();
+ }
+
+ ResourceStatistics result;
+
+ std::deque<os::ProcessTree> trees;
+ trees.push_back(tree.get());
+
+ while (!trees.empty()) {
+ os::ProcessTree root = trees.front();
+
+ if (root.process.rss.isSome()) {
+ result.set_mem_rss_bytes(
+ result.mem_rss_bytes() + root.process.rss.get().bytes());
+ }
+
+ trees.pop_front();
+ foreach (const os::ProcessTree& child, root.children) {
+ trees.push_back(child);
+ }
+ }
+
+ return result;
+ }
+
+private:
+ PosixMemIsolatorProcess() {}
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __POSIX_ISOLATOR_HPP__