You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/10/28 04:34:43 UTC
[mesos] branch master updated: Created cgroup recursively when calling prepare on containers.

This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new e3dc539  Created cgroup recursively when calling prepare on containers.
e3dc539 is described below

commit e3dc53946d999d212f305ddc6bbcc08cd27bd0b6
Author: fei long <ca...@gmail.com>
AuthorDate: Sat Oct 27 21:12:21 2018 -0700

    Created cgroup recursively when calling prepare on containers.
    
    Review: https://reviews.apache.org/r/68986/
---
 .../mesos/isolators/cgroups/cgroups.cpp            |   3 +-
 src/tests/containerizer/cgroups_isolator_tests.cpp | 124 +++++++++++++++++++++
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
index fbb1b43..507665c 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
@@ -438,7 +438,8 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare(
 
     Try<Nothing> create = cgroups::create(
         hierarchy,
-        infos[containerId]->cgroup);
+        infos[containerId]->cgroup,
+        true);
 
     if (create.isError()) {
       return Failure(
diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp b/src/tests/containerizer/cgroups_isolator_tests.cpp
index fccab20..eeaa850 100644
--- a/src/tests/containerizer/cgroups_isolator_tests.cpp
+++ b/src/tests/containerizer/cgroups_isolator_tests.cpp
@@ -687,6 +687,130 @@ TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_PidsAndTids)
 }
 
 
+// This tests the creation of cgroup when cgoups_root dir is gone.
+// All tasks will fail if this happens after slave starting/recovering.
+// We should create cgroup recursively to solve this. SEE MESOS-9305.
+TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_CreateRecursively)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.isolation = "cgroups/mem";
+
+  Fetcher fetcher(flags);
+
+  Try<MesosContainerizer*> _containerizer =
+    MesosContainerizer::create(flags, true, &fetcher);
+
+  ASSERT_SOME(_containerizer);
+
+  Owned<MesosContainerizer> containerizer(_containerizer.get());
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(
+      detector.get(),
+      containerizer.get(),
+      flags);
+
+  ASSERT_SOME(slave);
+
+  Result<string> hierarchy = cgroups::hierarchy("memory");
+  ASSERT_SOME(hierarchy);
+
+  // We should remove cgroups_root after the slave being started
+  // because slave will create cgroups_root dir during startup
+  // if it's not present.
+  ASSERT_SOME(cgroups::remove(hierarchy.get(), flags.cgroups_root));
+  ASSERT_FALSE(os::exists(flags.cgroups_root));
+
+  MockScheduler sched;
+
+  MesosSchedulerDriver driver(
+      &sched,
+      DEFAULT_FRAMEWORK_INFO,
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  Future<Nothing> schedRegistered;
+  EXPECT_CALL(sched, registered(_, _, _))
+    .WillOnce(FutureSatisfy(&schedRegistered));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(schedRegistered);
+
+  AWAIT_READY(offers);
+  EXPECT_EQ(1u, offers->size());
+
+  // Create a task to be launched in the mesos-container. We will be
+  // explicitly killing this task to perform the cleanup test.
+  TaskInfo task = createTask(offers.get()[0], "sleep 1000");
+
+  Future<TaskStatus> statusStarting;
+  Future<TaskStatus> statusRunning;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&statusStarting))
+    .WillOnce(FutureArg<1>(&statusRunning));
+
+  driver.launchTasks(offers.get()[0].id(), {task});
+
+  // Capture the update to verify that the task has been launched.
+  AWAIT_READY(statusStarting);
+  ASSERT_EQ(TASK_STARTING, statusStarting->state());
+
+  AWAIT_READY(statusRunning);
+  ASSERT_EQ(TASK_RUNNING, statusRunning->state());
+
+  // Task is ready. Make sure there is exactly 1 container in the hashset.
+  Future<hashset<ContainerID>> containers = containerizer->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers->size());
+
+  const ContainerID& containerID = *(containers->begin());
+
+  // Check if the memory cgroup for this container exists, by
+  // checking for the processes associated with this cgroup.
+  string cgroup = path::join(
+      flags.cgroups_root,
+      containerID.value());
+
+  Try<set<pid_t>> pids = cgroups::processes(hierarchy.get(), cgroup);
+  ASSERT_SOME(pids);
+
+  // There should be at least one TGID associated with this cgroup.
+  EXPECT_LE(1u, pids->size());
+
+  // Isolator cleanup test: Killing the task should cleanup the cgroup
+  // associated with the container.
+  Future<TaskStatus> killStatus;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&killStatus));
+
+  // Wait for the executor to exit. We are using 'gc.schedule' as a proxy event
+  // to monitor the exit of the executor.
+  Future<Nothing> gcSchedule = FUTURE_DISPATCH(
+      _, &slave::GarbageCollectorProcess::schedule);
+
+  driver.killTask(statusRunning->task_id());
+
+  AWAIT_READY(gcSchedule);
+
+  // If the cleanup is successful the memory cgroup for this container should
+  // not exist.
+  ASSERT_FALSE(os::exists(cgroup));
+
+  driver.stop();
+  driver.join();
+}
+
+
 class NetClsHandleManagerTest : public testing::Test {};