You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/10/28 20:23:39 UTC

[3/8] git commit: Use pid namespace to destroy container when available.

Use pid namespace to destroy container when available.

The Linux launcher will check if a container is running in a pid
namespace and will kill all processes rather than using the freezer.
This approach is backwards and forwards compatible.

Review: https://reviews.apache.org/r/25966


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fa44b0a9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fa44b0a9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fa44b0a9

Branch: refs/heads/master
Commit: fa44b0a9f040bff81043b1fd9963efe6dac80379
Parents: 823b992
Author: Ian Downes <id...@twitter.com>
Authored: Fri Oct 24 11:57:43 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Oct 28 12:04:16 2014 -0700

----------------------------------------------------------------------
 src/slave/containerizer/linux_launcher.cpp |  32 ++++
 src/tests/slave_recovery_tests.cpp         | 215 ++++++++++++++++++++++++
 2 files changed, 247 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/fa44b0a9/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp
index 7a4ef69..10c1203 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -31,11 +31,14 @@
 #include <stout/strings.hpp>
 
 #include "linux/cgroups.hpp"
+#include "linux/ns.hpp"
 
 #include "mesos/resources.hpp"
 
 #include "slave/containerizer/linux_launcher.hpp"
 
+#include "slave/containerizer/isolators/namespaces/pid.hpp"
+
 using namespace process;
 
 using std::list;
@@ -365,6 +368,35 @@ Future<Nothing> LinuxLauncher::destroy(const ContainerID& containerId)
 
   pids.erase(containerId);
 
+  // Just return if the cgroup was destroyed and the slave didn't receive the
+  // notification. See comment in recover().
+  Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
+  if (exists.isError()) {
+    return Failure("Failed to check existence of freezer cgroup: " +
+                   exists.error());
+  }
+
+  if (!exists.get()) {
+    return Nothing();
+  }
+
+  Result<ino_t> containerPidNs =
+    NamespacesPidIsolatorProcess::getNamespace(containerId);
+
+  if (containerPidNs.isSome()) {
+    LOG(INFO) << "Using pid namespace to destroy container " << containerId;
+
+    return ns::pid::destroy(containerPidNs.get())
+      .then(lambda::bind(
+            (Future<Nothing>(*)(const string&,
+                                const string&,
+                                const Duration&))(&cgroups::destroy),
+            hierarchy,
+            cgroup(containerId),
+            cgroups::DESTROY_TIMEOUT));
+  }
+
+  // Try to clean up using just the freezer cgroup.
   return cgroups::destroy(
       hierarchy,
       cgroup(containerId),

http://git-wip-us.apache.org/repos/asf/mesos/blob/fa44b0a9/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 813e2d6..98e059f 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3490,3 +3490,218 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
   delete containerizer2.get();
 }
 #endif // __linux__
+
+
+#ifdef __linux__
+// Test that a container started without namespace/pid isolation can
+// be destroyed correctly with namespace/pid isolation enabled.
+TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceForward)
+{
+  Try<PID<Master> > master = this->StartMaster();
+  ASSERT_SOME(master);
+
+  // Start a slave using a containerizer without pid namespace
+  // isolation.
+  slave::Flags flags = this->CreateSlaveFlags();
+  flags.isolation = "cgroups/cpu,cgroups/mem";
+  flags.slave_subsystems = "";
+
+  Try<MesosContainerizer*> containerizer1 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer1);
+
+  Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+
+  // Scheduler expectations.
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillRepeatedly(Return());
+
+  // Enable checkpointing for the framework.
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO);
+  frameworkInfo.set_checkpoint(true);
+
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(_, _, _));
+
+  Future<vector<Offer> > offers1;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers1);
+  EXPECT_NE(0u, offers1.get().size());
+
+  SlaveID slaveId = offers1.get()[0].slave_id();
+
+  TaskInfo task1 = createTask(
+      slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000");
+  vector<TaskInfo> tasks1;
+  tasks1.push_back(task1);
+
+  // Message expectations.
+  Future<Message> registerExecutorMessage =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers1.get()[0].id(), tasks1);
+
+  AWAIT_READY(registerExecutorMessage);
+
+  Future<hashset<ContainerID> > containers = containerizer1.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers.get().size());
+
+  ContainerID containerId = *(containers.get().begin());
+
+  // Stop the slave.
+  this->Stop(slave.get());
+  delete containerizer1.get();
+
+  // Start a slave using a containerizer with pid namespace isolation.
+  flags.isolation = "cgroups/cpu,cgroups/mem,namespaces/pid";
+
+  Try<MesosContainerizer*> containerizer2 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer2);
+
+  Future<vector<Offer> > offers2;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return());        // Ignore subsequent offers.
+
+  slave = this->StartSlave(containerizer2.get(), flags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(offers2);
+  EXPECT_NE(0u, offers2.get().size());
+
+  // Set up to wait on the container's termination.
+  Future<containerizer::Termination> termination =
+    containerizer2.get()->wait(containerId);
+
+  // Destroy the container.
+  containerizer2.get()->destroy(containerId);
+
+  AWAIT_READY(termination);
+
+  driver.stop();
+  driver.join();
+
+  this->Shutdown();
+  delete containerizer2.get();
+}
+
+
+// Test that a container started with namespace/pid isolation can
+// be destroyed correctly without namespace/pid isolation enabled.
+TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceBackward)
+{
+  Try<PID<Master> > master = this->StartMaster();
+  ASSERT_SOME(master);
+
+  // Start a slave using a containerizer with pid namespace isolation.
+  slave::Flags flags = this->CreateSlaveFlags();
+  flags.isolation = "cgroups/cpu,cgroups/mem,namespaces/pid";
+  flags.slave_subsystems = "";
+
+  Try<MesosContainerizer*> containerizer1 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer1);
+
+  Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+
+  // Scheduler expectations.
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillRepeatedly(Return());
+
+  // Enable checkpointing for the framework.
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO);
+  frameworkInfo.set_checkpoint(true);
+
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(_, _, _));
+
+  Future<vector<Offer> > offers1;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers1);
+  EXPECT_NE(0u, offers1.get().size());
+
+  SlaveID slaveId = offers1.get()[0].slave_id();
+
+  TaskInfo task1 = createTask(
+      slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000");
+  vector<TaskInfo> tasks1;
+  tasks1.push_back(task1);
+
+  // Message expectations.
+  Future<Message> registerExecutorMessage =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers1.get()[0].id(), tasks1);
+
+  AWAIT_READY(registerExecutorMessage);
+
+  Future<hashset<ContainerID> > containers = containerizer1.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers.get().size());
+
+  ContainerID containerId = *(containers.get().begin());
+
+  // Stop the slave.
+  this->Stop(slave.get());
+  delete containerizer1.get();
+
+  // Start a slave using a containerizer without pid namespace
+  // isolation.
+  flags.isolation = "cgroups/cpu,cgroups/mem";
+
+  Try<MesosContainerizer*> containerizer2 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer2);
+
+  Future<vector<Offer> > offers2;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return());        // Ignore subsequent offers.
+
+  slave = this->StartSlave(containerizer2.get(), flags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(offers2);
+  EXPECT_NE(0u, offers2.get().size());
+
+  // Set up to wait on the container's termination.
+  Future<containerizer::Termination> termination =
+    containerizer2.get()->wait(containerId);
+
+  // Destroy the container.
+  containerizer2.get()->destroy(containerId);
+
+  AWAIT_READY(termination);
+
+  driver.stop();
+  driver.join();
+
+  this->Shutdown();
+  delete containerizer2.get();
+}
+
+#endif // __linux__