You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2019/08/23 07:50:41 UTC

[mesos] branch 1.7.x updated (61f1155 -> e51ab55)

This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a change to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from 61f1155  Transitioned tasks when an unreachable agent is marked as gone.
     new 7b01d0d  Used cached cgroups for updating resources in Docker containerizer.
     new e51ab55  Added MESOS-9836 to the 1.7.3 CHANGELOG.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGELOG                                          |   1 +
 src/slave/containerizer/docker.cpp                 | 125 ++++++++++++---------
 src/slave/containerizer/docker.hpp                 |   8 +-
 .../containerizer/docker_containerizer_tests.cpp   |   2 +-
 4 files changed, 77 insertions(+), 59 deletions(-)


[mesos] 01/02: Used cached cgroups for updating resources in Docker containerizer.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 7b01d0d35e8e17434d6f8e8840c8586565fe8d6c
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Wed Aug 21 16:50:06 2019 +0800

    Used cached cgroups for updating resources in Docker containerizer.
    
    Review: https://reviews.apache.org/r/71335
---
 src/slave/containerizer/docker.cpp                 | 125 ++++++++++++---------
 src/slave/containerizer/docker.hpp                 |   8 +-
 .../containerizer/docker_containerizer_tests.cpp   |   2 +-
 3 files changed, 76 insertions(+), 59 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index dacf4de..d734581 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -1702,9 +1702,9 @@ Future<Nothing> DockerContainerizerProcess::update(
     return Nothing();
   }
 
-  // Skip inspecting the docker container if we already have the pid.
-  if (container->pid.isSome()) {
-    return __update(containerId, _resources, container->pid.get());
+  // Skip inspecting the docker container if we already have the cgroups.
+  if (container->cpuCgroup.isSome() && container->memoryCgroup.isSome()) {
+    return __update(containerId, _resources);
   }
 
   string containerName = containers_.at(containerId)->containerName;
@@ -1755,6 +1755,7 @@ Future<Nothing> DockerContainerizerProcess::update(
 }
 
 
+#ifdef __linux__
 Future<Nothing> DockerContainerizerProcess::_update(
     const ContainerID& containerId,
     const Resources& _resources,
@@ -1772,23 +1773,6 @@ Future<Nothing> DockerContainerizerProcess::_update(
 
   containers_.at(containerId)->pid = container.pid.get();
 
-  return __update(containerId, _resources, container.pid.get());
-}
-
-
-Future<Nothing> DockerContainerizerProcess::__update(
-    const ContainerID& containerId,
-    const Resources& _resources,
-    pid_t pid)
-{
-#ifdef __linux__
-  // Determine the cgroups hierarchies where the 'cpu' and
-  // 'memory' subsystems are mounted (they may be the same). Note that
-  // we make these static so we can reuse the result for subsequent
-  // calls.
-  static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
-  static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
-
   // NOTE: Normally, a Docker container should be in its own cgroup.
   // However, a zombie process (exited but not reaped) will be
   // temporarily moved into the system root cgroup. We add some
@@ -1796,18 +1780,6 @@ Future<Nothing> DockerContainerizerProcess::__update(
   // in the root cgroup. See MESOS-8480 for details.
   const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
 
-  if (cpuHierarchy.isError()) {
-    return Failure("Failed to determine the cgroup hierarchy "
-                   "where the 'cpu' subsystem is mounted: " +
-                   cpuHierarchy.error());
-  }
-
-  if (memoryHierarchy.isError()) {
-    return Failure("Failed to determine the cgroup hierarchy "
-                   "where the 'memory' subsystem is mounted: " +
-                   memoryHierarchy.error());
-  }
-
   // We need to find the cgroup(s) this container is currently running
   // in for both the hierarchy with the 'cpu' subsystem attached and
   // the hierarchy with the 'memory' subsystem attached so we can
@@ -1815,8 +1787,7 @@ Future<Nothing> DockerContainerizerProcess::__update(
 
   // Determine the cgroup for the 'cpu' subsystem (based on the
   // container's pid).
-  Result<string> cpuCgroup = cgroups::cpu::cgroup(pid);
-
+  Result<string> cpuCgroup = cgroups::cpu::cgroup(container.pid.get());
   if (cpuCgroup.isError()) {
     return Failure("Failed to determine cgroup for the 'cpu' subsystem: " +
                    cpuCgroup.error());
@@ -1826,14 +1797,73 @@ Future<Nothing> DockerContainerizerProcess::__update(
                  << " where the 'cpu' subsystem is mounted";
   } else if (cpuCgroup.get() == systemRootCgroup) {
     LOG(WARNING)
-        << "Process '" << pid
+        << "Process '" << container.pid.get()
+        << "' should not be in the system root cgroup (being destroyed?)";
+  } else {
+    // Cache the CPU cgroup.
+    containers_.at(containerId)->cpuCgroup = cpuCgroup.get();
+  }
+
+  // Now determine the cgroup for the 'memory' subsystem.
+  Result<string> memoryCgroup = cgroups::memory::cgroup(container.pid.get());
+  if (memoryCgroup.isError()) {
+    return Failure("Failed to determine cgroup for the 'memory' subsystem: " +
+                   memoryCgroup.error());
+  } else if (memoryCgroup.isNone()) {
+    LOG(WARNING) << "Container " << containerId
+                 << " does not appear to be a member of a cgroup"
+                 << " where the 'memory' subsystem is mounted";
+  } else if (memoryCgroup.get() == systemRootCgroup) {
+    LOG(WARNING)
+        << "Process '" << container.pid.get()
         << "' should not be in the system root cgroup (being destroyed?)";
+  } else {
+    // Cache the memory cgroup.
+    containers_.at(containerId)->memoryCgroup = memoryCgroup.get();
+  }
+
+  if (containers_.at(containerId)->cpuCgroup.isNone() &&
+      containers_.at(containerId)->memoryCgroup.isNone()) {
+    return Nothing();
+  }
+
+  return __update(containerId, _resources);
+}
+
+
+Future<Nothing> DockerContainerizerProcess::__update(
+    const ContainerID& containerId,
+    const Resources& _resources)
+{
+  CHECK(containers_.contains(containerId));
+
+  Container* container = containers_.at(containerId);
+
+  // Determine the cgroups hierarchies where the 'cpu' and
+  // 'memory' subsystems are mounted (they may be the same). Note that
+  // we make these static so we can reuse the result for subsequent
+  // calls.
+  static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
+  static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
+
+  if (cpuHierarchy.isError()) {
+    return Failure("Failed to determine the cgroup hierarchy "
+                   "where the 'cpu' subsystem is mounted: " +
+                   cpuHierarchy.error());
+  }
+
+  if (memoryHierarchy.isError()) {
+    return Failure("Failed to determine the cgroup hierarchy "
+                   "where the 'memory' subsystem is mounted: " +
+                   memoryHierarchy.error());
   }
 
-  // And update the CPU shares (if applicable).
+  Option<string> cpuCgroup = container->cpuCgroup;
+  Option<string> memoryCgroup = container->memoryCgroup;
+
+  // Update the CPU shares (if applicable).
   if (cpuHierarchy.isSome() &&
       cpuCgroup.isSome() &&
-      cpuCgroup.get() != systemRootCgroup &&
       _resources.cpus().isSome()) {
     double cpuShares = _resources.cpus().get();
 
@@ -1881,26 +1911,9 @@ Future<Nothing> DockerContainerizerProcess::__update(
     }
   }
 
-  // Now determine the cgroup for the 'memory' subsystem.
-  Result<string> memoryCgroup = cgroups::memory::cgroup(pid);
-
-  if (memoryCgroup.isError()) {
-    return Failure("Failed to determine cgroup for the 'memory' subsystem: " +
-                   memoryCgroup.error());
-  } else if (memoryCgroup.isNone()) {
-    LOG(WARNING) << "Container " << containerId
-                 << " does not appear to be a member of a cgroup"
-                 << " where the 'memory' subsystem is mounted";
-  } else if (memoryCgroup.get() == systemRootCgroup) {
-    LOG(WARNING)
-        << "Process '" << pid
-        << "' should not be in the system root cgroup (being destroyed?)";
-  }
-
-  // And update the memory limits (if applicable).
+  // Update the memory limits (if applicable).
   if (memoryHierarchy.isSome() &&
       memoryCgroup.isSome() &&
-      memoryCgroup.get() != systemRootCgroup &&
       _resources.mem().isSome()) {
     // TODO(tnachen): investigate and handle OOM with docker.
     Bytes mem = _resources.mem().get();
@@ -1947,10 +1960,10 @@ Future<Nothing> DockerContainerizerProcess::__update(
                 << " for container " << containerId;
     }
   }
-#endif // __linux__
 
   return Nothing();
 }
+#endif // __linux__
 
 
 Future<ResourceStatistics> DockerContainerizerProcess::usage(
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index ca41f3b..0349f53 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -250,6 +250,7 @@ private:
       const ContainerID& containerId,
       process::Future<Nothing> future);
 
+#ifdef __linux__
   process::Future<Nothing> _update(
       const ContainerID& containerId,
       const Resources& resources,
@@ -257,8 +258,8 @@ private:
 
   process::Future<Nothing> __update(
       const ContainerID& containerId,
-      const Resources& resources,
-      pid_t pid);
+      const Resources& resources);
+#endif // __linux__
 
   process::Future<Nothing> mountPersistentVolumes(
       const ContainerID& containerId);
@@ -519,6 +520,9 @@ private:
 #ifdef __linux__
     // GPU resources allocated to the container.
     std::set<Gpu> gpus;
+
+    Option<std::string> cpuCgroup;
+    Option<std::string> memoryCgroup;
 #endif // __linux__
 
     // Marks if this container launches an executor in a docker
diff --git a/src/tests/containerizer/docker_containerizer_tests.cpp b/src/tests/containerizer/docker_containerizer_tests.cpp
index a621758..df06f60 100644
--- a/src/tests/containerizer/docker_containerizer_tests.cpp
+++ b/src/tests/containerizer/docker_containerizer_tests.cpp
@@ -1217,7 +1217,7 @@ TEST_F(DockerContainerizerTest, ROOT_DOCKER_Update)
 
   newResources = Resources::parse("cpus:1;mem:144");
 
-  // Issue second update that uses the cached pid instead of inspect.
+  // Issue second update that uses the cached cgroups instead of inspect.
   update = dockerContainerizer.update(containerId.get(), newResources.get());
 
   AWAIT_READY(update);


[mesos] 02/02: Added MESOS-9836 to the 1.7.3 CHANGELOG.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit e51ab5524715115754c55a721d899fb44e99f7d2
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Fri Aug 23 15:32:49 2019 +0800

    Added MESOS-9836 to the 1.7.3 CHANGELOG.
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index fc75ce0..06c88db 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -24,6 +24,7 @@ Release Notes - Mesos - Version 1.7.3 (WIP)
   * [MESOS-9786] - Race between two REMOVE_QUOTA calls crashes the master.
   * [MESOS-9787] - Log slow SSL (TLS) peer reverse DNS lookup.
   * [MESOS-9803] - Memory leak caused by an infinite chain of futures in `UriDiskProfileAdaptor`.
+  * [MESOS-9836] - Docker containerizer overwrites `/mesos/slave` cgroups.
   * [MESOS-9852] - Slow memory growth in master due to deferred deletion of offer filters and timers.
   * [MESOS-9856] - REVIVE call with specified role(s) clears filters for all roles of a framework.
   * [MESOS-9868] - NetworkInfo from the agent /state endpoint is not correct.