You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2019/08/23 07:57:36 UTC

[mesos] branch 1.6.x updated (c6da50d -> 9badb3b)

This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a change to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from c6da50d  Transitioned tasks when an unreachable agent is marked as gone.
     new 1c70f29  Used cached cgroups for updating resources in Docker containerizer.
     new 9badb3b  Added MESOS-9836 to the 1.6.3 CHANGELOG.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGELOG                                          |   1 +
 src/slave/containerizer/docker.cpp                 | 125 ++++++++++++---------
 src/slave/containerizer/docker.hpp                 |   8 +-
 .../containerizer/docker_containerizer_tests.cpp   |   2 +-
 4 files changed, 77 insertions(+), 59 deletions(-)


[mesos] 02/02: Added MESOS-9836 to the 1.6.3 CHANGELOG.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 9badb3b89583b4026eec10f374f325d049c87eb7
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Fri Aug 23 15:33:52 2019 +0800

    Added MESOS-9836 to the 1.6.3 CHANGELOG.
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index 87b3a1d..579e72b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -15,6 +15,7 @@ Release Notes - Mesos - Version 1.6.3 (WIP)
   * [MESOS-9766] - /__processes__ endpoint can hang.
   * [MESOS-9786] - Race between two REMOVE_QUOTA calls crashes the master.
   * [MESOS-9787] - Log slow SSL (TLS) peer reverse DNS lookup.
+  * [MESOS-9836] - Docker containerizer overwrites `/mesos/slave` cgroups.
   * [MESOS-9852] - Slow memory growth in master due to deferred deletion of offer filters and timers.
   * [MESOS-9856] - REVIVE call with specified role(s) clears filters for all roles of a framework.
   * [MESOS-9868] - NetworkInfo from the agent /state endpoint is not correct.


[mesos] 01/02: Used cached cgroups for updating resources in Docker containerizer.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 1c70f29bdd270cdff8ce55bdfaab56581829017c
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Wed Aug 21 16:50:06 2019 +0800

    Used cached cgroups for updating resources in Docker containerizer.
    
    Review: https://reviews.apache.org/r/71335
---
 src/slave/containerizer/docker.cpp                 | 125 ++++++++++++---------
 src/slave/containerizer/docker.hpp                 |   8 +-
 .../containerizer/docker_containerizer_tests.cpp   |   2 +-
 3 files changed, 76 insertions(+), 59 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 85b22f4..573d3cc 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -1687,9 +1687,9 @@ Future<Nothing> DockerContainerizerProcess::update(
     return Nothing();
   }
 
-  // Skip inspecting the docker container if we already have the pid.
-  if (container->pid.isSome()) {
-    return __update(containerId, _resources, container->pid.get());
+  // Skip inspecting the docker container if we already have the cgroups.
+  if (container->cpuCgroup.isSome() && container->memoryCgroup.isSome()) {
+    return __update(containerId, _resources);
   }
 
   string containerName = containers_.at(containerId)->containerName;
@@ -1740,6 +1740,7 @@ Future<Nothing> DockerContainerizerProcess::update(
 }
 
 
+#ifdef __linux__
 Future<Nothing> DockerContainerizerProcess::_update(
     const ContainerID& containerId,
     const Resources& _resources,
@@ -1757,23 +1758,6 @@ Future<Nothing> DockerContainerizerProcess::_update(
 
   containers_.at(containerId)->pid = container.pid.get();
 
-  return __update(containerId, _resources, container.pid.get());
-}
-
-
-Future<Nothing> DockerContainerizerProcess::__update(
-    const ContainerID& containerId,
-    const Resources& _resources,
-    pid_t pid)
-{
-#ifdef __linux__
-  // Determine the cgroups hierarchies where the 'cpu' and
-  // 'memory' subsystems are mounted (they may be the same). Note that
-  // we make these static so we can reuse the result for subsequent
-  // calls.
-  static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
-  static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
-
   // NOTE: Normally, a Docker container should be in its own cgroup.
   // However, a zombie process (exited but not reaped) will be
   // temporarily moved into the system root cgroup. We add some
@@ -1781,18 +1765,6 @@ Future<Nothing> DockerContainerizerProcess::__update(
   // in the root cgroup. See MESOS-8480 for details.
   const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
 
-  if (cpuHierarchy.isError()) {
-    return Failure("Failed to determine the cgroup hierarchy "
-                   "where the 'cpu' subsystem is mounted: " +
-                   cpuHierarchy.error());
-  }
-
-  if (memoryHierarchy.isError()) {
-    return Failure("Failed to determine the cgroup hierarchy "
-                   "where the 'memory' subsystem is mounted: " +
-                   memoryHierarchy.error());
-  }
-
   // We need to find the cgroup(s) this container is currently running
   // in for both the hierarchy with the 'cpu' subsystem attached and
   // the hierarchy with the 'memory' subsystem attached so we can
@@ -1800,8 +1772,7 @@ Future<Nothing> DockerContainerizerProcess::__update(
 
   // Determine the cgroup for the 'cpu' subsystem (based on the
   // container's pid).
-  Result<string> cpuCgroup = cgroups::cpu::cgroup(pid);
-
+  Result<string> cpuCgroup = cgroups::cpu::cgroup(container.pid.get());
   if (cpuCgroup.isError()) {
     return Failure("Failed to determine cgroup for the 'cpu' subsystem: " +
                    cpuCgroup.error());
@@ -1811,14 +1782,73 @@ Future<Nothing> DockerContainerizerProcess::__update(
                  << " where the 'cpu' subsystem is mounted";
   } else if (cpuCgroup.get() == systemRootCgroup) {
     LOG(WARNING)
-        << "Process '" << pid
+        << "Process '" << container.pid.get()
+        << "' should not be in the system root cgroup (being destroyed?)";
+  } else {
+    // Cache the CPU cgroup.
+    containers_.at(containerId)->cpuCgroup = cpuCgroup.get();
+  }
+
+  // Now determine the cgroup for the 'memory' subsystem.
+  Result<string> memoryCgroup = cgroups::memory::cgroup(container.pid.get());
+  if (memoryCgroup.isError()) {
+    return Failure("Failed to determine cgroup for the 'memory' subsystem: " +
+                   memoryCgroup.error());
+  } else if (memoryCgroup.isNone()) {
+    LOG(WARNING) << "Container " << containerId
+                 << " does not appear to be a member of a cgroup"
+                 << " where the 'memory' subsystem is mounted";
+  } else if (memoryCgroup.get() == systemRootCgroup) {
+    LOG(WARNING)
+        << "Process '" << container.pid.get()
         << "' should not be in the system root cgroup (being destroyed?)";
+  } else {
+    // Cache the memory cgroup.
+    containers_.at(containerId)->memoryCgroup = memoryCgroup.get();
+  }
+
+  if (containers_.at(containerId)->cpuCgroup.isNone() &&
+      containers_.at(containerId)->memoryCgroup.isNone()) {
+    return Nothing();
+  }
+
+  return __update(containerId, _resources);
+}
+
+
+Future<Nothing> DockerContainerizerProcess::__update(
+    const ContainerID& containerId,
+    const Resources& _resources)
+{
+  CHECK(containers_.contains(containerId));
+
+  Container* container = containers_.at(containerId);
+
+  // Determine the cgroups hierarchies where the 'cpu' and
+  // 'memory' subsystems are mounted (they may be the same). Note that
+  // we make these static so we can reuse the result for subsequent
+  // calls.
+  static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
+  static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
+
+  if (cpuHierarchy.isError()) {
+    return Failure("Failed to determine the cgroup hierarchy "
+                   "where the 'cpu' subsystem is mounted: " +
+                   cpuHierarchy.error());
+  }
+
+  if (memoryHierarchy.isError()) {
+    return Failure("Failed to determine the cgroup hierarchy "
+                   "where the 'memory' subsystem is mounted: " +
+                   memoryHierarchy.error());
   }
 
-  // And update the CPU shares (if applicable).
+  Option<string> cpuCgroup = container->cpuCgroup;
+  Option<string> memoryCgroup = container->memoryCgroup;
+
+  // Update the CPU shares (if applicable).
   if (cpuHierarchy.isSome() &&
       cpuCgroup.isSome() &&
-      cpuCgroup.get() != systemRootCgroup &&
       _resources.cpus().isSome()) {
     double cpuShares = _resources.cpus().get();
 
@@ -1866,26 +1896,9 @@ Future<Nothing> DockerContainerizerProcess::__update(
     }
   }
 
-  // Now determine the cgroup for the 'memory' subsystem.
-  Result<string> memoryCgroup = cgroups::memory::cgroup(pid);
-
-  if (memoryCgroup.isError()) {
-    return Failure("Failed to determine cgroup for the 'memory' subsystem: " +
-                   memoryCgroup.error());
-  } else if (memoryCgroup.isNone()) {
-    LOG(WARNING) << "Container " << containerId
-                 << " does not appear to be a member of a cgroup"
-                 << " where the 'memory' subsystem is mounted";
-  } else if (memoryCgroup.get() == systemRootCgroup) {
-    LOG(WARNING)
-        << "Process '" << pid
-        << "' should not be in the system root cgroup (being destroyed?)";
-  }
-
-  // And update the memory limits (if applicable).
+  // Update the memory limits (if applicable).
   if (memoryHierarchy.isSome() &&
       memoryCgroup.isSome() &&
-      memoryCgroup.get() != systemRootCgroup &&
       _resources.mem().isSome()) {
     // TODO(tnachen): investigate and handle OOM with docker.
     Bytes mem = _resources.mem().get();
@@ -1932,10 +1945,10 @@ Future<Nothing> DockerContainerizerProcess::__update(
                 << " for container " << containerId;
     }
   }
-#endif // __linux__
 
   return Nothing();
 }
+#endif // __linux__
 
 
 Future<ResourceStatistics> DockerContainerizerProcess::usage(
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index 6bdf4c7..2f76e2d 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -232,6 +232,7 @@ private:
       const ContainerID& containerId,
       process::Future<Nothing> future);
 
+#ifdef __linux__
   process::Future<Nothing> _update(
       const ContainerID& containerId,
       const Resources& resources,
@@ -239,8 +240,8 @@ private:
 
   process::Future<Nothing> __update(
       const ContainerID& containerId,
-      const Resources& resources,
-      pid_t pid);
+      const Resources& resources);
+#endif // __linux__
 
   process::Future<Nothing> mountPersistentVolumes(
       const ContainerID& containerId);
@@ -499,6 +500,9 @@ private:
 #ifdef __linux__
     // GPU resources allocated to the container.
     std::set<Gpu> gpus;
+
+    Option<std::string> cpuCgroup;
+    Option<std::string> memoryCgroup;
 #endif // __linux__
 
     // Marks if this container launches an executor in a docker
diff --git a/src/tests/containerizer/docker_containerizer_tests.cpp b/src/tests/containerizer/docker_containerizer_tests.cpp
index 5adc5fd..fdb0d2f 100644
--- a/src/tests/containerizer/docker_containerizer_tests.cpp
+++ b/src/tests/containerizer/docker_containerizer_tests.cpp
@@ -1169,7 +1169,7 @@ TEST_F(DockerContainerizerTest, ROOT_DOCKER_Update)
 
   newResources = Resources::parse("cpus:1;mem:144");
 
-  // Issue second update that uses the cached pid instead of inspect.
+  // Issue second update that uses the cached cgroups instead of inspect.
   update = dockerContainerizer.update(containerId.get(), newResources.get());
 
   AWAIT_READY(update);