You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/10/28 04:50:11 UTC

[mesos] branch 1.6.x updated (f272bcb -> 8877f03)

This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a change to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from f272bcb  Fixed bug in 'execute.cpp' with tty-based tasks and no 'containerInfo'.
     new 43d37a7  Created cgroup recursively when calling prepare on containers.
     new 8877f03  Added MESOS-9305 to 1.6.2 CHANGELOG.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGELOG                                          |   1 +
 .../mesos/isolators/cgroups/cgroups.cpp            |   3 +-
 src/tests/containerizer/cgroups_isolator_tests.cpp | 124 +++++++++++++++++++++
 3 files changed, 127 insertions(+), 1 deletion(-)


[mesos] 02/02: Added MESOS-9305 to 1.6.2 CHANGELOG.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 8877f031a4ce6a1673a23e00ac847d181e797d7c
Author: Jie Yu <yu...@gmail.com>
AuthorDate: Sat Oct 27 21:46:41 2018 -0700

    Added MESOS-9305 to 1.6.2 CHANGELOG.
    
    (cherry picked from commit c877e0af9045ad9e64e7d124ec9bc30d1f9a65cd)
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index 748a010..e5f85ad 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -32,6 +32,7 @@ Release Notes - Mesos - Version 1.6.2 (WIP)
 
 ** Improvement
   * [MESOS-9189] - Include 'Connection: close' header in master streaming API responses.
+  * [MESOS-9305] - Create cgoup recursively to workaround systemd deleting cgroups_root.
   * [MESOS-9340] - Log all socket errors in libprocess.
 
 


[mesos] 01/02: Created cgroup recursively when calling prepare on containers.

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jieyu pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 43d37a7523a2fd4f36b1e15a7416727d927d37ce
Author: fei long <ca...@gmail.com>
AuthorDate: Sat Oct 27 21:12:21 2018 -0700

    Created cgroup recursively when calling prepare on containers.
    
    Review: https://reviews.apache.org/r/68986/
    (cherry picked from commit e3dc53946d999d212f305ddc6bbcc08cd27bd0b6)
---
 .../mesos/isolators/cgroups/cgroups.cpp            |   3 +-
 src/tests/containerizer/cgroups_isolator_tests.cpp | 124 +++++++++++++++++++++
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
index 6d663a5..9f909b3 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
@@ -413,7 +413,8 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare(
 
     Try<Nothing> create = cgroups::create(
         hierarchy,
-        infos[containerId]->cgroup);
+        infos[containerId]->cgroup,
+        true);
 
     if (create.isError()) {
       return Failure(
diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp b/src/tests/containerizer/cgroups_isolator_tests.cpp
index 40c18a1..68846ec 100644
--- a/src/tests/containerizer/cgroups_isolator_tests.cpp
+++ b/src/tests/containerizer/cgroups_isolator_tests.cpp
@@ -675,6 +675,130 @@ TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_PidsAndTids)
 }
 
 
+// This tests the creation of cgroup when cgoups_root dir is gone.
+// All tasks will fail if this happens after slave starting/recovering.
+// We should create cgroup recursively to solve this. SEE MESOS-9305.
+TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_CreateRecursively)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.isolation = "cgroups/mem";
+
+  Fetcher fetcher(flags);
+
+  Try<MesosContainerizer*> _containerizer =
+    MesosContainerizer::create(flags, true, &fetcher);
+
+  ASSERT_SOME(_containerizer);
+
+  Owned<MesosContainerizer> containerizer(_containerizer.get());
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(
+      detector.get(),
+      containerizer.get(),
+      flags);
+
+  ASSERT_SOME(slave);
+
+  Result<string> hierarchy = cgroups::hierarchy("memory");
+  ASSERT_SOME(hierarchy);
+
+  // We should remove cgroups_root after the slave being started
+  // because slave will create cgroups_root dir during startup
+  // if it's not present.
+  ASSERT_SOME(cgroups::remove(hierarchy.get(), flags.cgroups_root));
+  ASSERT_FALSE(os::exists(flags.cgroups_root));
+
+  MockScheduler sched;
+
+  MesosSchedulerDriver driver(
+      &sched,
+      DEFAULT_FRAMEWORK_INFO,
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  Future<Nothing> schedRegistered;
+  EXPECT_CALL(sched, registered(_, _, _))
+    .WillOnce(FutureSatisfy(&schedRegistered));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(schedRegistered);
+
+  AWAIT_READY(offers);
+  EXPECT_EQ(1u, offers->size());
+
+  // Create a task to be launched in the mesos-container. We will be
+  // explicitly killing this task to perform the cleanup test.
+  TaskInfo task = createTask(offers.get()[0], "sleep 1000");
+
+  Future<TaskStatus> statusStarting;
+  Future<TaskStatus> statusRunning;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&statusStarting))
+    .WillOnce(FutureArg<1>(&statusRunning));
+
+  driver.launchTasks(offers.get()[0].id(), {task});
+
+  // Capture the update to verify that the task has been launched.
+  AWAIT_READY(statusStarting);
+  ASSERT_EQ(TASK_STARTING, statusStarting->state());
+
+  AWAIT_READY(statusRunning);
+  ASSERT_EQ(TASK_RUNNING, statusRunning->state());
+
+  // Task is ready. Make sure there is exactly 1 container in the hashset.
+  Future<hashset<ContainerID>> containers = containerizer->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers->size());
+
+  const ContainerID& containerID = *(containers->begin());
+
+  // Check if the memory cgroup for this container exists, by
+  // checking for the processes associated with this cgroup.
+  string cgroup = path::join(
+      flags.cgroups_root,
+      containerID.value());
+
+  Try<set<pid_t>> pids = cgroups::processes(hierarchy.get(), cgroup);
+  ASSERT_SOME(pids);
+
+  // There should be at least one TGID associated with this cgroup.
+  EXPECT_LE(1u, pids->size());
+
+  // Isolator cleanup test: Killing the task should cleanup the cgroup
+  // associated with the container.
+  Future<TaskStatus> killStatus;
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillOnce(FutureArg<1>(&killStatus));
+
+  // Wait for the executor to exit. We are using 'gc.schedule' as a proxy event
+  // to monitor the exit of the executor.
+  Future<Nothing> gcSchedule = FUTURE_DISPATCH(
+      _, &slave::GarbageCollectorProcess::schedule);
+
+  driver.killTask(statusRunning->task_id());
+
+  AWAIT_READY(gcSchedule);
+
+  // If the cleanup is successful the memory cgroup for this container should
+  // not exist.
+  ASSERT_FALSE(os::exists(cgroup));
+
+  driver.stop();
+  driver.join();
+}
+
+
 class NetClsHandleManagerTest : public testing::Test {};