You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/10/28 04:50:22 UTC

[mesos] branch 1.5.x updated (d6bc88d -> c4802b9)

This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a change to branch 1.5.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from d6bc88d  Fixed bug in 'execute.cpp' with tty-based tasks and no 'containerInfo'.
     new bf8b066  Created cgroup recursively when calling prepare on containers.
     new c4802b9  Added MESOS-9305 to 1.5.3 CHANGELOG.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGELOG                                          |   3 +
 .../mesos/isolators/cgroups/cgroups.cpp            |   3 +-
 src/tests/containerizer/cgroups_isolator_tests.cpp | 124 +++++++++++++++++++++
 3 files changed, 129 insertions(+), 1 deletion(-)


[mesos] 02/02: Added MESOS-9305 to 1.5.3 CHANGELOG.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch 1.5.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit c4802b9f28f17168c6a56e8226b1e051d3bb1456
Author: Jie Yu <yu...@gmail.com>
AuthorDate: Sat Oct 27 21:46:57 2018 -0700

    Added MESOS-9305 to 1.5.3 CHANGELOG.
    
    (cherry picked from commit 5a268f1991c1b40549022593183f4793b136e0fd)
---
 CHANGELOG | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index ec8ccad..5f66104 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,9 @@ Release Notes - Mesos - Version 1.5.3 (WIP)
 ** Bug
   * [MESOS-8907] - Docker image fetcher fails with HTTP/2.
 
+** Improvement
+  * [MESOS-9305] - Create cgoup recursively to workaround systemd deleting cgroups_root.
+
 
 Release Notes - Mesos - Version 1.5.2
 -------------------------------------


[mesos] 01/02: Created cgroup recursively when calling prepare on containers.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch 1.5.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit bf8b06610d2e44ae874e9ee4f8b4d0bf859ff73e
Author: fei long <ca...@gmail.com>
AuthorDate: Sat Oct 27 21:12:21 2018 -0700

    Created cgroup recursively when calling prepare on containers.
    
    Review: https://reviews.apache.org/r/68986/
    (cherry picked from commit e3dc53946d999d212f305ddc6bbcc08cd27bd0b6)
---
 .../mesos/isolators/cgroups/cgroups.cpp            |   3 +-
 src/tests/containerizer/cgroups_isolator_tests.cpp | 124 +++++++++++++++++++++
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
index 6d663a5..9f909b3 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
@@ -413,7 +413,8 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare(
 
     Try<Nothing> create = cgroups::create(
         hierarchy,
-        infos[containerId]->cgroup);
+        infos[containerId]->cgroup,
+        true);
 
     if (create.isError()) {
       return Failure(
diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp b/src/tests/containerizer/cgroups_isolator_tests.cpp
index 5412a7e..9b53563 100644
--- a/src/tests/containerizer/cgroups_isolator_tests.cpp
+++ b/src/tests/containerizer/cgroups_isolator_tests.cpp
@@ -675,6 +675,130 @@ TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_PidsAndTids)
 }
 
 
+// This tests the creation of cgroup when cgoups_root dir is gone.
+// All tasks will fail if this happens after slave starting/recovering.
+// We should create cgroup recursively to solve this. SEE MESOS-9305.
+TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_CreateRecursively)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.isolation = "cgroups/mem";
+
+  Fetcher fetcher(flags);
+
+  Try<MesosContainerizer*> _containerizer =
+    MesosContainerizer::create(flags, true, &fetcher);
+
+  ASSERT_SOME(_containerizer);
+
+  Owned<MesosContainerizer> containerizer(_containerizer.get());
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(
+      detector.get(),
+      containerizer.get(),
+      flags);
+
+  ASSERT_SOME(slave);
+
+  Result<string> hierarchy = cgroups::hierarchy("memory");
+  ASSERT_SOME(hierarchy);
+
+  // We should remove cgroups_root after the slave being started
+  // because slave will create cgroups_root dir during startup
+  // if it's not present.
+  ASSERT_SOME(cgroups::remove(hierarchy.get(), flags.cgroups_root));
+  ASSERT_FALSE(os::exists(flags.cgroups_root));
+
+  MockScheduler sched;
+
+  MesosSchedulerDriver driver(
+      &sched,
+      DEFAULT_FRAMEWORK_INFO,
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  Future<Nothing> schedRegistered;
+  EXPECT_CALL(sched, registered(_, _, _))
+    .WillOnce(FutureSatisfy(&schedRegistered));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(schedRegistered);
+
+  AWAIT_READY(offers);
+  EXPECT_EQ(1u, offers->size());
+
+  // Create a task to be launched in the mesos-container. We will be
+  // explicitly killing this task to perform the cleanup test.
+  TaskInfo task = createTask(offers.get()[0], "sleep 1000");
+
+  Future<TaskStatus> statusStarting;
+  Future<TaskStatus> statusRunning;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&statusStarting))
+    .WillOnce(FutureArg<1>(&statusRunning));
+
+  driver.launchTasks(offers.get()[0].id(), {task});
+
+  // Capture the update to verify that the task has been launched.
+  AWAIT_READY(statusStarting);
+  ASSERT_EQ(TASK_STARTING, statusStarting->state());
+
+  AWAIT_READY(statusRunning);
+  ASSERT_EQ(TASK_RUNNING, statusRunning->state());
+
+  // Task is ready. Make sure there is exactly 1 container in the hashset.
+  Future<hashset<ContainerID>> containers = containerizer->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers->size());
+
+  const ContainerID& containerID = *(containers->begin());
+
+  // Check if the memory cgroup for this container exists, by
+  // checking for the processes associated with this cgroup.
+  string cgroup = path::join(
+      flags.cgroups_root,
+      containerID.value());
+
+  Try<set<pid_t>> pids = cgroups::processes(hierarchy.get(), cgroup);
+  ASSERT_SOME(pids);
+
+  // There should be at least one TGID associated with this cgroup.
+  EXPECT_LE(1u, pids->size());
+
+  // Isolator cleanup test: Killing the task should cleanup the cgroup
+  // associated with the container.
+  Future<TaskStatus> killStatus;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&killStatus));
+
+  // Wait for the executor to exit. We are using 'gc.schedule' as a proxy event
+  // to monitor the exit of the executor.
+  Future<Nothing> gcSchedule = FUTURE_DISPATCH(
+      _, &slave::GarbageCollectorProcess::schedule);
+
+  driver.killTask(statusRunning->task_id());
+
+  AWAIT_READY(gcSchedule);
+
+  // If the cleanup is successful the memory cgroup for this container should
+  // not exist.
+  ASSERT_FALSE(os::exists(cgroup));
+
+  driver.stop();
+  driver.join();
+}
+
+
 class NetClsHandleManagerTest : public testing::Test {};