You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/11/04 19:41:35 UTC

[2/2] git commit: Permit unprivileged executors to created child cgroups.

Permit unprivileged executors to created child cgroups.

MesosContainerizer Isolators using cgroups will chown the cgroup to
the executor user, if specified, enabling the executor to create child
cgroups. Cgroup control files are not chowned so the executor cannot
modify its own cgroups, only control files for child cgroups.

Review: https://reviews.apache.org/r/27558


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c8598f7f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c8598f7f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c8598f7f

Branch: refs/heads/master
Commit: c8598f7f5a24a01b6a68e0f060b79662ee97af89
Parents: 529e288
Author: Ian Downes <id...@twitter.com>
Authored: Mon Nov 3 16:48:06 2014 -0800
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Nov 4 10:41:12 2014 -0800

----------------------------------------------------------------------
 .../isolators/cgroups/cpushare.cpp              |  13 ++
 .../containerizer/isolators/cgroups/mem.cpp     |  13 ++
 .../isolators/cgroups/perf_event.cpp            |  13 ++
 src/tests/isolator_tests.cpp                    | 134 +++++++++++++++++++
 4 files changed, 173 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c8598f7f/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
index dbf6130..90aabb8 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.cpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -278,6 +278,19 @@ Future<Option<CommandInfo> > CgroupsCpushareIsolatorProcess::prepare(
     if (create.isError()) {
       return Failure("Failed to prepare isolator: " + create.error());
     }
+
+    // Chown the cgroup so the executor can create nested cgroups. Do
+    // not recurse so the control files are still owned by the slave
+    // user and thus cannot be changed by the executor.
+    if (user.isSome()) {
+      Try<Nothing> chown = os::chown(
+          user.get(),
+          path::join(hierarchies[subsystem], info->cgroup),
+          false);
+      if (chown.isError()) {
+        return Failure("Failed to prepare isolator: " + chown.error());
+      }
+    }
   }
 
   return update(containerId, executorInfo.resources())

http://git-wip-us.apache.org/repos/asf/mesos/blob/c8598f7f/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp
index abdc436..3a3ac40 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.cpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -235,6 +235,19 @@ Future<Option<CommandInfo> > CgroupsMemIsolatorProcess::prepare(
     return Failure("Failed to prepare isolator: " + create.error());
   }
 
+  // Chown the cgroup so the executor can create nested cgroups. Do
+  // not recurse so the control files are still owned by the slave
+  // user and thus cannot be changed by the executor.
+  if (user.isSome()) {
+    Try<Nothing> chown = os::chown(
+        user.get(),
+        path::join(hierarchy, info->cgroup),
+        false);
+    if (chown.isError()) {
+      return Failure("Failed to prepare isolator: " + chown.error());
+    }
+  }
+
   oomListen(containerId);
 
   return update(containerId, executorInfo.resources())

http://git-wip-us.apache.org/repos/asf/mesos/blob/c8598f7f/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
index d6837de..6f67164 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.cpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -247,6 +247,19 @@ Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
     }
   }
 
+  // Chown the cgroup so the executor can create nested cgroups. Do
+  // not recurse so the control files are still owned by the slave
+  // user and thus cannot be changed by the executor.
+  if (user.isSome()) {
+    Try<Nothing> chown = os::chown(
+        user.get(),
+        path::join(hierarchy, info->cgroup),
+        false);
+    if (chown.isError()) {
+      return Failure("Failed to prepare isolator: " + chown.error());
+    }
+  }
+
   return None();
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/c8598f7f/src/tests/isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator_tests.cpp b/src/tests/isolator_tests.cpp
index 6b49a9e..01c0239 100644
--- a/src/tests/isolator_tests.cpp
+++ b/src/tests/isolator_tests.cpp
@@ -1003,4 +1003,138 @@ TEST_F(NamespacesPidIsolatorTest, ROOT_PidNamespace)
   delete containerizer.get();
 }
 
+
+// Username for the unprivileged user that will be created to test
+// unprivileged cgroup creation. It will be removed after the tests.
+// It is presumed this user does not normally exist.
+const string UNPRIVILEGED_USERNAME = "mesos.test.unprivileged.user";
+
+
+template <typename T>
+class UserCgroupIsolatorTest : public MesosTest
+{
+public:
+  static void SetUpTestCase()
+  {
+    // Remove the user in case it wasn't cleaned up from a previous
+    // test.
+    os::system("userdel -r " + UNPRIVILEGED_USERNAME);
+
+    ASSERT_EQ(0, os::system("useradd " + UNPRIVILEGED_USERNAME));
+  }
+
+
+  static void TearDownTestCase()
+  {
+    ASSERT_EQ(0, os::system("userdel -r " + UNPRIVILEGED_USERNAME));
+  }
+};
+
+
+// Test all isolators that use cgroups.
+typedef ::testing::Types<
+  CgroupsMemIsolatorProcess,
+  CgroupsCpushareIsolatorProcess,
+  CgroupsPerfEventIsolatorProcess> CgroupsIsolatorTypes;
+
+
+TYPED_TEST_CASE(UserCgroupIsolatorTest, CgroupsIsolatorTypes);
+
+
+TYPED_TEST(UserCgroupIsolatorTest, ROOT_CGROUPS_UserCgroup)
+{
+  slave::Flags flags;
+  flags.perf_events = "cpu-cycles"; // Needed for CgroupsPerfEventIsolator.
+
+  Try<Isolator*> isolator = TypeParam::create(flags);
+  CHECK_SOME(isolator);
+
+  ExecutorInfo executorInfo;
+  executorInfo.mutable_resources()->CopyFrom(
+      Resources::parse("mem:1024;cpus:1").get()); // For cpu/mem isolators.
+
+  ContainerID containerId;
+  containerId.set_value("container");
+
+  AWAIT_READY(isolator.get()->prepare(
+        containerId,
+        executorInfo,
+        os::getcwd(),
+        UNPRIVILEGED_USERNAME));
+
+  // Isolators don't provide a way to determine the cgroups they use
+  // so we'll inspect the cgroups for an isolated dummy process.
+  pid_t pid = fork();
+  if (pid == 0) {
+    // Child just sleeps.
+    ::sleep(100);
+
+    ABORT("Child process should not reach here");
+  }
+  ASSERT_GT(pid, 0);
+
+  AWAIT_READY(isolator.get()->isolate(containerId, pid));
+
+  // Get the container's cgroups from /proc/$PID/cgroup. We're only
+  // interested in the non-root cgroups, i.e., we exclude those with
+  // paths "/", e.g., only cpu and cpuacct from this example:
+  // 6:blkio:/
+  // 5:perf_event:/
+  // 4:memory:/
+  // 3:freezer:/
+  // 2:cpuacct:/mesos
+  // 1:cpu:/mesos
+  // awk will then output "cpuacct/mesos\ncpu/mesos" as the cgroup(s).
+  ostringstream output;
+  Try<int> status = os::shell(
+      &output,
+      "grep -v '/$' /proc/" +
+      stringify(pid) +
+      "/cgroup | awk -F ':' '{print $2$3}'");
+
+  ASSERT_SOME(status);
+
+  // Kill the dummy child process.
+  ::kill(pid, SIGKILL);
+  int exitStatus;
+  EXPECT_NE(-1, ::waitpid(pid, &exitStatus, 0));
+
+  vector<string> cgroups = strings::tokenize(output.str(), "\n");
+  ASSERT_FALSE(cgroups.empty());
+
+  foreach (const string& cgroup, cgroups) {
+    // Check the user cannot manipulate the container's cgroup control
+    // files.
+    EXPECT_NE(0, os::system(
+          "su - " + UNPRIVILEGED_USERNAME +
+          " -c 'echo $$ >" +
+          path::join(flags.cgroups_hierarchy, cgroup, "cgroup.procs") +
+          "'"));
+
+    // Check the user can create a cgroup under the container's
+    // cgroup.
+    string userCgroup = path::join(cgroup, "user");
+
+    EXPECT_EQ(0, os::system(
+          "su - " +
+          UNPRIVILEGED_USERNAME +
+          " -c 'mkdir " +
+          path::join(flags.cgroups_hierarchy, userCgroup) +
+          "'"));
+
+    // Check the user can manipulate control files in the created
+    // cgroup.
+    EXPECT_EQ(0, os::system(
+          "su - " +
+          UNPRIVILEGED_USERNAME +
+          " -c 'echo $$ >" +
+          path::join(flags.cgroups_hierarchy, userCgroup, "cgroup.procs") +
+          "'"));
+  }
+
+  // Clean up the container. This will also remove the nested cgroups.
+  AWAIT_READY(isolator.get()->cleanup(containerId));
+
+  delete isolator.get();
+}
 #endif // __linux__