You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/10/28 20:23:43 UTC

[7/8] git commit: Add ns::pid::destroy() to destroy a pid namespace.

Add ns::pid::destroy() to destroy a pid namespace.

All processes are signalled with SIGKILL then reaped. The order of
signalling is not determined, i.e., generally the init pid is not the
first pid signalled.

Review: https://reviews.apache.org/r/25966/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/03421130
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/03421130
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/03421130

Branch: refs/heads/master
Commit: 03421130e827975214db6fafbcd97258055c5d79
Parents: 3650573
Author: Ian Downes <id...@twitter.com>
Authored: Fri Oct 24 11:50:13 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Oct 28 12:04:16 2014 -0700

----------------------------------------------------------------------
 src/linux/ns.hpp       | 80 +++++++++++++++++++++++++++++++++++++++++++++
 src/tests/ns_tests.cpp | 73 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/03421130/src/linux/ns.hpp
----------------------------------------------------------------------
diff --git a/src/linux/ns.hpp b/src/linux/ns.hpp
index 60adaa4..ec1a517 100644
--- a/src/linux/ns.hpp
+++ b/src/linux/ns.hpp
@@ -40,6 +40,10 @@
 #include <stout/os/exists.hpp>
 #include <stout/os/ls.hpp>
 
+#include <process/collect.hpp>
+#include <process/future.hpp>
+#include <process/reap.hpp>
+
 namespace ns {
 
 // Returns all the supported namespaces by the kernel.
@@ -223,6 +227,82 @@ inline Try<ino_t> getns(pid_t pid, const std::string& ns)
   return s.st_ino;
 }
 
+
+namespace pid {
+
+namespace internal {
+
+inline Nothing _nothing() { return Nothing(); }
+
+} // namespace internal {
+
+inline process::Future<Nothing> destroy(ino_t inode)
+{
+  // Check we're not trying to kill the root namespace.
+  Try<ino_t> ns = ns::getns(1, "pid");
+  if (ns.isError()) {
+    return process::Failure(ns.error());
+  }
+
+  if (ns.get() == inode) {
+    return process::Failure("Cannot destroy root pid namespace");
+  }
+
+  // Or ourselves.
+  ns = ns::getns(::getpid(), "pid");
+  if (ns.isError()) {
+    return process::Failure(ns.error());
+  }
+
+  if (ns.get() == inode) {
+    return process::Failure("Cannot destroy own pid namespace");
+  }
+
+  // Signal all pids in the namespace, including the init pid if it's
+  // still running. Once the init pid has been signalled the kernel
+  // will prevent any new children forking in the namespace and will
+  // also signal all other pids in the namespace.
+  Try<std::set<pid_t>> pids = os::pids();
+  if (pids.isError()) {
+    return process::Failure("Failed to list of processes");
+  }
+
+  foreach (pid_t pid, pids.get()) {
+    // Ignore any errors, probably because the process no longer
+    // exists, and ignorable otherwise.
+    Try<ino_t> ns = ns::getns(pid, "pid");
+    if (ns.isSome() && ns.get() == inode) {
+      kill(pid, SIGKILL);
+    }
+  }
+
+  // Get a new snapshot and do a second pass of the pids to capture
+  // any pids that are dying so we can reap them.
+  pids = os::pids();
+  if (pids.isError()) {
+    return process::Failure("Failed to list of processes");
+  }
+
+  std::list<process::Future<Option<int>>> futures;
+
+  foreach (pid_t pid, pids.get()) {
+    Try<ino_t> ns = ns::getns(pid, "pid");
+    if (ns.isSome() && ns.get() == inode) {
+      futures.push_back(process::reap(pid));
+    }
+
+    // Ignore any errors, probably because the process no longer
+    // exists, and ignorable otherwise.
+  }
+
+  // Wait for all the signalled processes to terminate. The pid
+  // namespace wil then be empty and will be released by the kernel
+  // (unless there are additional references).
+  return process::collect(futures)
+    .then(lambda::bind(&internal::_nothing));
+}
+
+} // namespace pid {
 } // namespace ns {
 
 #endif // __LINUX_NS_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/03421130/src/tests/ns_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/ns_tests.cpp b/src/tests/ns_tests.cpp
index 30218cf..eb385d0 100644
--- a/src/tests/ns_tests.cpp
+++ b/src/tests/ns_tests.cpp
@@ -214,3 +214,76 @@ TEST(NsTest, ROOT_getns)
   ASSERT_TRUE(WIFSIGNALED(status));
   EXPECT_EQ(SIGKILL, WTERMSIG(status));
 }
+
+
+static int childDestroy(void* arg)
+{
+  // Fork a bunch of children.
+  ::fork();
+  ::fork();
+  ::fork();
+
+  // Parent and all children sleep.
+  while (true) { sleep(1); }
+
+  ABORT("Error, child should be killed before reaching here");
+}
+
+
+// Test we can destroy a pid namespace, i.e., kill all processes.
+TEST(NsTest, ROOT_destroy)
+{
+  set<string> namespaces = ns::namespaces();
+
+  if (namespaces.count("pid") == 0) {
+    // Pid namespace is not available.
+    return;
+  }
+
+  Try<int> nstype = ns::nstype("pid");
+  ASSERT_SOME(nstype);
+
+  // 8 MiB stack for child.
+  static unsigned long long stack[(8*1024*1024)/sizeof(unsigned long long)];
+
+  pid_t pid = clone(
+      childDestroy,
+      &stack[sizeof(stack)/sizeof(stack[0]) - 1], // Stack grows down.
+      SIGCHLD | nstype.get(),
+      NULL);
+
+  ASSERT_NE(-1, pid);
+
+  Future<Option<int>> status = process::reap(pid);
+
+  // Ensure the child is in a different pid namespace.
+  Try<ino_t> childNs = ns::getns(pid, "pid");
+  ASSERT_SOME(childNs);
+
+  Try<ino_t> ourNs = ns::getns(::getpid(), "pid");
+  ASSERT_SOME(ourNs);
+
+  ASSERT_NE(ourNs.get(), childNs.get());
+
+  // Kill the child.
+  AWAIT_READY(ns::pid::destroy(childNs.get()));
+
+  AWAIT_READY(status);
+  ASSERT_SOME(status.get());
+  ASSERT_TRUE(WIFSIGNALED(status.get().get()));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status.get().get()));
+
+  // Finally, verify that no processes are in the child's pid
+  // namespace, i.e., destroy() also killed all descendants.
+  Try<set<pid_t>> pids = os::pids();
+  ASSERT_SOME(pids);
+
+  foreach (pid_t pid, pids.get()) {
+    Try<ino_t> otherNs = ns::getns(pid, "pid");
+    // pid may have exited since getting the snapshot of pids so
+    // ignore any error.
+    if (otherNs.isSome()) {
+      ASSERT_SOME_NE(childNs.get(), otherNs);
+    }
+  }
+}