You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2015/09/15 01:38:29 UTC

[1/2] mesos git commit: Added logics in LinuxFilesystemIsolator to recover and cleanup orphans.

Repository: mesos
Updated Branches:
  refs/heads/master 89308f8ea -> 1c10fdbfa


Added logics in LinuxFilesystemIsolator to recover and cleanup orphans.

Review: https://reviews.apache.org/r/38374


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1c10fdbf
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1c10fdbf
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1c10fdbf

Branch: refs/heads/master
Commit: 1c10fdbfaad1841f9ca93e3a3c255f81b0defe5a
Parents: 304032b
Author: Jie Yu <yu...@gmail.com>
Authored: Mon Sep 14 15:28:44 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Mon Sep 14 16:38:02 2015 -0700

----------------------------------------------------------------------
 .../isolators/filesystem/linux.cpp              | 86 +++++++++++++++-----
 .../isolators/filesystem/linux.hpp              | 26 +++---
 src/slave/paths.cpp                             |  6 ++
 src/slave/paths.hpp                             |  3 +
 4 files changed, 86 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1c10fdbf/src/slave/containerizer/isolators/filesystem/linux.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/linux.cpp b/src/slave/containerizer/isolators/filesystem/linux.cpp
index b3ba53c..dbdbf87 100644
--- a/src/slave/containerizer/isolators/filesystem/linux.cpp
+++ b/src/slave/containerizer/isolators/filesystem/linux.cpp
@@ -94,23 +94,6 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::recover(
     const list<ContainerState>& states,
     const hashset<ContainerID>& orphans)
 {
-  list<Future<Nothing>> futures;
-  foreachvalue (const Owned<Provisioner>& provisioner, provisioners) {
-    futures.push_back(provisioner->recover(states, orphans));
-  }
-
-  return collect(futures)
-    .then(defer(PID<LinuxFilesystemIsolatorProcess>(this),
-                &LinuxFilesystemIsolatorProcess::_recover,
-                states,
-                orphans));
-}
-
-
-Future<Nothing> LinuxFilesystemIsolatorProcess::_recover(
-    const list<ContainerState>& states,
-    const hashset<ContainerID>& orphans)
-{
   // Read the mount table in the host mount namespace to recover paths
   // to containers' work directories if their root filesystems are
   // changed. Method 'cleanup()' relies on this information to clean
@@ -133,12 +116,71 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::_recover(
     infos.put(state.container_id(), info);
   }
 
-  // TODO(jieyu): Clean up unknown containers' work directory mounts
-  // and the corresponding persistent volume mounts. This can be
-  // achieved by iterating the mount table and find those unknown
-  // mounts whose sources are under the slave 'work_dir'.
+  // Recover both known and unknown orphans by scanning the mount
+  // table and finding those mounts whose roots are under slave's
+  // sandbox root directory. Those mounts are container's work
+  // directory mounts. Mounts from unknown orphans will be cleaned up
+  // immediately. Mounts from known orphans will be cleaned up when
+  // those known orphan containers are being destroyed by the slave.
+  hashset<ContainerID> unknownOrphans;
 
-  return Nothing();
+  string sandboxRootDir = paths::getSandboxRootDir(flags.work_dir);
+
+  foreach (const fs::MountInfoTable::Entry& entry, table.get().entries) {
+    if (!strings::startsWith(entry.root, sandboxRootDir)) {
+      continue;
+    }
+
+    // TODO(jieyu): Here, we retrieve the container ID by taking the
+    // basename of 'entry.root'. This assumes that the slave's sandbox
+    // root directory are organized according to the comments in the
+    // beginning of slave/paths.hpp.
+    ContainerID containerId;
+    containerId.set_value(Path(entry.root).basename());
+
+    if (infos.contains(containerId)) {
+      continue;
+    }
+
+    Owned<Info> info(new Info(entry.root));
+
+    if (entry.root != entry.target) {
+      info->sandbox = entry.target;
+    }
+
+    infos.put(containerId, info);
+
+    // Remember all the unknown orphan containers.
+    if (!orphans.contains(containerId)) {
+      unknownOrphans.insert(containerId);
+    }
+  }
+
+  // Cleanup mounts from unknown orphans.
+  list<Future<Nothing>> futures;
+  foreach (const ContainerID& containerId, unknownOrphans) {
+    futures.push_back(cleanup(containerId));
+  }
+
+  return collect(futures)
+    .then(defer(PID<LinuxFilesystemIsolatorProcess>(this),
+                &LinuxFilesystemIsolatorProcess::_recover,
+                states,
+                orphans));
+}
+
+
+Future<Nothing> LinuxFilesystemIsolatorProcess::_recover(
+    const list<ContainerState>& states,
+    const hashset<ContainerID>& orphans)
+{
+  list<Future<Nothing>> futures;
+  foreachvalue (const Owned<Provisioner>& provisioner, provisioners) {
+    futures.push_back(provisioner->recover(states, orphans));
+  }
+
+  return collect(futures)
+    .then([]() -> Future<Nothing> { return Nothing(); });
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1c10fdbf/src/slave/containerizer/isolators/filesystem/linux.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/linux.hpp b/src/slave/containerizer/isolators/filesystem/linux.hpp
index ab60f0c..6cfe9fa 100644
--- a/src/slave/containerizer/isolators/filesystem/linux.hpp
+++ b/src/slave/containerizer/isolators/filesystem/linux.hpp
@@ -87,23 +87,23 @@ private:
       const hashset<ContainerID>& orphans);
 
   process::Future<Option<mesos::slave::ContainerPrepareInfo>> _prepare(
-    const ContainerID& containerId,
-    const ExecutorInfo& executorInfo,
-    const std::string& directory,
-    const Option<std::string>& user,
-    const Option<std::string>& rootfs);
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo,
+      const std::string& directory,
+      const Option<std::string>& user,
+      const Option<std::string>& rootfs);
 
   process::Future<Option<mesos::slave::ContainerPrepareInfo>> __prepare(
-    const ContainerID& containerId,
-    const ExecutorInfo& executorInfo,
-    const std::string& directory,
-    const Option<std::string>& user,
-    const Option<std::string>& rootfs);
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo,
+      const std::string& directory,
+      const Option<std::string>& user,
+      const Option<std::string>& rootfs);
 
   Try<std::string> script(
-    const ExecutorInfo& executorInfo,
-    const std::string& directory,
-    const Option<std::string>& rootfs);
+      const ExecutorInfo& executorInfo,
+      const std::string& directory,
+      const Option<std::string>& rootfs);
 
   const Flags flags;
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1c10fdbf/src/slave/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/paths.cpp b/src/slave/paths.cpp
index f5697fb..f104ecd 100644
--- a/src/slave/paths.cpp
+++ b/src/slave/paths.cpp
@@ -62,6 +62,12 @@ string getMetaRootDir(const string& rootDir)
 }
 
 
+string getSandboxRootDir(const string& rootDir)
+{
+  return path::join(rootDir, "slaves");
+}
+
+
 string getArchiveDir(const string& rootDir)
 {
   return path::join(rootDir, "archive");

http://git-wip-us.apache.org/repos/asf/mesos/blob/1c10fdbf/src/slave/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/paths.hpp b/src/slave/paths.hpp
index 35b0439..43c65af 100644
--- a/src/slave/paths.hpp
+++ b/src/slave/paths.hpp
@@ -100,6 +100,9 @@ const char LATEST_SYMLINK[] = "latest";
 std::string getMetaRootDir(const std::string& rootDir);
 
 
+std::string getSandboxRootDir(const std::string& rootDir);
+
+
 std::string getArchiveDir(const std::string& rootDir);
 
 


[2/2] mesos git commit: Made container sandbox a shared mount to address MESOS-3349.

Posted by ji...@apache.org.
Made container sandbox a shared mount to address MESOS-3349.

Review: https://reviews.apache.org/r/38333


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/304032b4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/304032b4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/304032b4

Branch: refs/heads/master
Commit: 304032b4893ec282196bbb7fefd889fe5d49b3f5
Parents: 89308f8
Author: Jie Yu <yu...@gmail.com>
Authored: Sat Sep 12 23:28:29 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Mon Sep 14 16:38:02 2015 -0700

----------------------------------------------------------------------
 .../isolators/filesystem/linux.cpp              | 93 ++++++++++++++------
 .../provisioners/appc/provisioner.cpp           | 29 +++---
 .../provisioners/backends/bind.cpp              | 32 +++++--
 3 files changed, 108 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/isolators/filesystem/linux.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/linux.cpp b/src/slave/containerizer/isolators/filesystem/linux.cpp
index 0970b3d..b3ba53c 100644
--- a/src/slave/containerizer/isolators/filesystem/linux.cpp
+++ b/src/slave/containerizer/isolators/filesystem/linux.cpp
@@ -244,16 +244,51 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
 
   ContainerPrepareInfo prepareInfo;
 
-  // If the container changes its root filesystem, we need to mount
-  // the container's work directory into its root filesystem (creating
-  // it if needed) so that the executor and the task can access the
-  // work directory.
-  //
-  // NOTE: The mount of the work directory must be a shared mount in
-  // the host filesystem so that any mounts underneath it will
-  // propagate into the container's mount namespace. This is how we
-  // can update persistent volumes for the container.
-  if (rootfs.isSome()) {
+  if (rootfs.isNone()) {
+    // It the container does not change its root filesystem, we need
+    // to do a self bind mount of the container's work directory and
+    // mark it as a shared mount. This is necessary for any mounts
+    // underneath it to be propagated into the container's mount
+    // namespace. This is how we can update persistent volumes.
+    LOG(INFO) << "Bind mounting work directory '" << directory
+              << "' for container " << containerId;
+
+    Try<Nothing> mount = fs::mount(
+        directory,
+        directory,
+        None(),
+        MS_BIND,
+        NULL);
+
+    if (mount.isError()) {
+      return Failure(
+          "Failed to self bind mount work directory '" +
+          directory + "': " + mount.error());
+    }
+
+    mount = fs::mount(
+        None(),
+        directory,
+        None(),
+        MS_SHARED,
+        NULL);
+
+    if (mount.isError()) {
+      return Failure(
+          "Failed to mark work directory '" + directory +
+          "' as a shared mount: " + mount.error());
+    }
+  } else {
+    // If the container changes its root filesystem, we need to mount
+    // the container's work directory into its root filesystem
+    // (creating it if needed) so that the executor and the task can
+    // access the work directory.
+    //
+    // NOTE: The mount of the work directory must be a shared mount in
+    // the host filesystem so that any mounts underneath it will
+    // propagate into the container's mount namespace. This is how we
+    // can update persistent volumes for the container.
+
     // This is the mount point of the work directory in the root filesystem.
     const string sandbox = path::join(rootfs.get(), flags.sandbox_directory);
 
@@ -269,6 +304,9 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
       }
     }
 
+    LOG(INFO) << "Bind mounting work directory from '" << directory
+              << "' to '" << sandbox << "' for container " << containerId;
+
     Try<Nothing> mount = fs::mount(
         directory,
         sandbox,
@@ -291,7 +329,7 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
 
     if (mount.isError()) {
       return Failure(
-          "Failed to mark work directory '" + directory +
+          "Failed to mark sandbox '" + sandbox +
           "' as a shared mount: " + mount.error());
     }
 
@@ -335,6 +373,10 @@ Try<string> LinuxFilesystemIsolatorProcess::script(
   // propagate back to the host mount namespace.
   out << "mount --make-rslave /\n";
 
+  // TODO(jieyu): Try to unmount work directory mounts and persistent
+  // volume mounts for other containers to release the extra
+  // references to those mounts.
+
   foreach (const Volume& volume, executorInfo.container().volumes()) {
     if (!volume.has_host_path()) {
       return Error("A volume misses 'host_path'");
@@ -649,9 +691,11 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
     sandbox = info->directory;
   }
 
+  infos.erase(containerId);
+
   // Cleanup the mounts for this container in the host mount
-  // namespace, including container's work directory (if container
-  // root filesystem is used), and all the persistent volume mounts.
+  // namespace, including container's work directory and all the
+  // persistent volume mounts.
   Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
   if (table.isError()) {
     return Failure("Failed to get mount table: " + table.error());
@@ -665,7 +709,7 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
       LOG(INFO) << "Unmounting volume '" << entry.target
                 << "' for container " << containerId;
 
-      Try<Nothing> unmount = fs::unmount(entry.target, MNT_DETACH);
+      Try<Nothing> unmount = fs::unmount(entry.target);
       if (unmount.isError()) {
         return Failure(
             "Failed to unmount volume '" + entry.target +
@@ -674,22 +718,17 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
     }
   }
 
-  // Cleanup the container's work directory mount. We only need to do
-  // that if the container specifies a filesystem root.
-  if (info->sandbox.isSome()) {
-    LOG(INFO) << "Unmounting sandbox '" << info->sandbox.get()
-              << "' for container " << containerId;
+  // Cleanup the container's work directory mount.
+  LOG(INFO) << "Unmounting sandbox/work directory '" << sandbox
+            << "' for container " << containerId;
 
-    Try<Nothing> unmount = fs::unmount(info->sandbox.get(), MNT_DETACH);
-    if (unmount.isError()) {
-      return Failure(
-          "Failed to unmount sandbox '" + info->sandbox.get() +
-          "': " + unmount.error());
-    }
+  Try<Nothing> unmount = fs::unmount(sandbox);
+  if (unmount.isError()) {
+    return Failure(
+        "Failed to unmount sandbox/work directory '" + sandbox +
+        "': " + unmount.error());
   }
 
-  infos.erase(containerId);
-
   // Destroy the provisioned root filesystems.
   list<Future<bool>> futures;
   foreachvalue (const Owned<Provisioner>& provisioner, provisioners) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/provisioners/appc/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/provisioners/appc/provisioner.cpp b/src/slave/containerizer/provisioners/appc/provisioner.cpp
index cd29a00..77f9cbe 100644
--- a/src/slave/containerizer/provisioners/appc/provisioner.cpp
+++ b/src/slave/containerizer/provisioners/appc/provisioner.cpp
@@ -344,8 +344,8 @@ Future<bool> AppcProvisionerProcess::destroy(const ContainerID& containerId)
     return false;
   }
 
-  // Unregister the container first. If destroy() fails, we can rely on
-  // recover() to retry it later.
+  // Unregister the container first. If destroy() fails, we can rely
+  // on recover() to retry it later.
   Owned<Info> info = infos[containerId];
   infos.erase(containerId);
 
@@ -365,19 +365,26 @@ Future<bool> AppcProvisionerProcess::destroy(const ContainerID& containerId)
     }
   }
 
+  // NOTE: We calculate 'containerDir' here so that the following
+  // lambda does not need to bind 'this'.
+  string containerDir =
+    provisioners::paths::getContainerDir(root, containerId);
+
   // TODO(xujyan): Revisit the usefulness of this return value.
   return collect(futures)
-    .then([=]() -> Future<bool> {
-      // This should be fairly cheap as the directory should only contain a
-      // few empty sub-directories at this point.
-      string containerDir =
-        provisioners::paths::getContainerDir(root, containerId);
-
+    .then([containerDir]() -> Future<bool> {
+      // This should be fairly cheap as the directory should only
+      // contain a few empty sub-directories at this point.
+      //
+      // TODO(jieyu): Currently, it's possible that some directories
+      // cannot be removed due to EBUSY. EBUSY is caused by the race
+      // between cleaning up this container and new containers copying
+      // the host mount table. It's OK to ignore them. The cleanup
+      // will be retried during slave recovery.
       Try<Nothing> rmdir = os::rmdir(containerDir);
       if (rmdir.isError()) {
-        return Failure("Failed to remove container directory '" +
-                       containerDir + "' of container " +
-                       stringify(containerId));
+        LOG(ERROR) << "Failed to remove the provisioned container directory "
+                   << "at '" << containerDir << "'";
       }
 
       return true;

http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/provisioners/backends/bind.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/provisioners/backends/bind.cpp b/src/slave/containerizer/provisioners/backends/bind.cpp
index 1cdae61..71861a9 100644
--- a/src/slave/containerizer/provisioners/backends/bind.cpp
+++ b/src/slave/containerizer/provisioners/backends/bind.cpp
@@ -16,6 +16,10 @@
  * limitations under the License.
  */
 
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
 #include <process/dispatch.hpp>
 #include <process/process.hpp>
 
@@ -150,9 +154,9 @@ Future<bool> BindBackendProcess::destroy(const string& rootfs)
   }
 
   foreach (const fs::MountInfoTable::Entry& entry, mountTable.get().entries) {
-    // TODO(xujyan): If MS_REC was used in 'provision()' we would need to
-    // check `strings::startsWith(entry.target, rootfs)` here to unmount
-    // all nested mounts.
+    // TODO(xujyan): If MS_REC was used in 'provision()' we would need
+    // to check `strings::startsWith(entry.target, rootfs)` here to
+    // unmount all nested mounts.
     if (entry.target == rootfs) {
       // NOTE: This would fail if the rootfs is still in use.
       Try<Nothing> unmount = fs::unmount(entry.target);
@@ -162,11 +166,23 @@ Future<bool> BindBackendProcess::destroy(const string& rootfs)
             unmount.error());
       }
 
-      Try<Nothing> rmdir = os::rmdir(rootfs);
-      if (rmdir.isError()) {
-        return Failure(
-            "Failed to remove rootfs mount point '" + rootfs + "': " +
-            rmdir.error());
+      // TODO(jieyu): If 'rmdir' here returns EBUSY, we still returns
+      // a success. This is currently possible because the parent
+      // mount of 'rootfs' might not be a shared mount. Thus,
+      // containers in different mount namespaces might hold extra
+      // references to this mount. It is OK to ignore the EBUSY error
+      // because the provisioner will later try to delete all the
+      // rootfses for the terminated containers.
+      if (::rmdir(rootfs.c_str()) != 0) {
+        string message =
+          "Failed to remove rootfs mount point '" + rootfs +
+          "': " + strerror(errno);
+
+        if (errno == EBUSY) {
+          LOG(ERROR) << message;
+        } else {
+          return Failure(message);
+        }
       }
 
       return true;