You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2015/09/15 01:38:30 UTC
[2/2] mesos git commit: Made container sandbox a shared mount to
address MESOS-3349.
Made container sandbox a shared mount to address MESOS-3349.
Review: https://reviews.apache.org/r/38333
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/304032b4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/304032b4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/304032b4
Branch: refs/heads/master
Commit: 304032b4893ec282196bbb7fefd889fe5d49b3f5
Parents: 89308f8
Author: Jie Yu <yu...@gmail.com>
Authored: Sat Sep 12 23:28:29 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Mon Sep 14 16:38:02 2015 -0700
----------------------------------------------------------------------
.../isolators/filesystem/linux.cpp | 93 ++++++++++++++------
.../provisioners/appc/provisioner.cpp | 29 +++---
.../provisioners/backends/bind.cpp | 32 +++++--
3 files changed, 108 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/isolators/filesystem/linux.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/linux.cpp b/src/slave/containerizer/isolators/filesystem/linux.cpp
index 0970b3d..b3ba53c 100644
--- a/src/slave/containerizer/isolators/filesystem/linux.cpp
+++ b/src/slave/containerizer/isolators/filesystem/linux.cpp
@@ -244,16 +244,51 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
ContainerPrepareInfo prepareInfo;
- // If the container changes its root filesystem, we need to mount
- // the container's work directory into its root filesystem (creating
- // it if needed) so that the executor and the task can access the
- // work directory.
- //
- // NOTE: The mount of the work directory must be a shared mount in
- // the host filesystem so that any mounts underneath it will
- // propagate into the container's mount namespace. This is how we
- // can update persistent volumes for the container.
- if (rootfs.isSome()) {
+ if (rootfs.isNone()) {
+ // It the container does not change its root filesystem, we need
+ // to do a self bind mount of the container's work directory and
+ // mark it as a shared mount. This is necessary for any mounts
+ // underneath it to be propagated into the container's mount
+ // namespace. This is how we can update persistent volumes.
+ LOG(INFO) << "Bind mounting work directory '" << directory
+ << "' for container " << containerId;
+
+ Try<Nothing> mount = fs::mount(
+ directory,
+ directory,
+ None(),
+ MS_BIND,
+ NULL);
+
+ if (mount.isError()) {
+ return Failure(
+ "Failed to self bind mount work directory '" +
+ directory + "': " + mount.error());
+ }
+
+ mount = fs::mount(
+ None(),
+ directory,
+ None(),
+ MS_SHARED,
+ NULL);
+
+ if (mount.isError()) {
+ return Failure(
+ "Failed to mark work directory '" + directory +
+ "' as a shared mount: " + mount.error());
+ }
+ } else {
+ // If the container changes its root filesystem, we need to mount
+ // the container's work directory into its root filesystem
+ // (creating it if needed) so that the executor and the task can
+ // access the work directory.
+ //
+ // NOTE: The mount of the work directory must be a shared mount in
+ // the host filesystem so that any mounts underneath it will
+ // propagate into the container's mount namespace. This is how we
+ // can update persistent volumes for the container.
+
// This is the mount point of the work directory in the root filesystem.
const string sandbox = path::join(rootfs.get(), flags.sandbox_directory);
@@ -269,6 +304,9 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
}
}
+ LOG(INFO) << "Bind mounting work directory from '" << directory
+ << "' to '" << sandbox << "' for container " << containerId;
+
Try<Nothing> mount = fs::mount(
directory,
sandbox,
@@ -291,7 +329,7 @@ Future<Option<ContainerPrepareInfo>> LinuxFilesystemIsolatorProcess::__prepare(
if (mount.isError()) {
return Failure(
- "Failed to mark work directory '" + directory +
+ "Failed to mark sandbox '" + sandbox +
"' as a shared mount: " + mount.error());
}
@@ -335,6 +373,10 @@ Try<string> LinuxFilesystemIsolatorProcess::script(
// propagate back to the host mount namespace.
out << "mount --make-rslave /\n";
+ // TODO(jieyu): Try to unmount work directory mounts and persistent
+ // volume mounts for other containers to release the extra
+ // references to those mounts.
+
foreach (const Volume& volume, executorInfo.container().volumes()) {
if (!volume.has_host_path()) {
return Error("A volume misses 'host_path'");
@@ -649,9 +691,11 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
sandbox = info->directory;
}
+ infos.erase(containerId);
+
// Cleanup the mounts for this container in the host mount
- // namespace, including container's work directory (if container
- // root filesystem is used), and all the persistent volume mounts.
+ // namespace, including container's work directory and all the
+ // persistent volume mounts.
Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
if (table.isError()) {
return Failure("Failed to get mount table: " + table.error());
@@ -665,7 +709,7 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
LOG(INFO) << "Unmounting volume '" << entry.target
<< "' for container " << containerId;
- Try<Nothing> unmount = fs::unmount(entry.target, MNT_DETACH);
+ Try<Nothing> unmount = fs::unmount(entry.target);
if (unmount.isError()) {
return Failure(
"Failed to unmount volume '" + entry.target +
@@ -674,22 +718,17 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::cleanup(
}
}
- // Cleanup the container's work directory mount. We only need to do
- // that if the container specifies a filesystem root.
- if (info->sandbox.isSome()) {
- LOG(INFO) << "Unmounting sandbox '" << info->sandbox.get()
- << "' for container " << containerId;
+ // Cleanup the container's work directory mount.
+ LOG(INFO) << "Unmounting sandbox/work directory '" << sandbox
+ << "' for container " << containerId;
- Try<Nothing> unmount = fs::unmount(info->sandbox.get(), MNT_DETACH);
- if (unmount.isError()) {
- return Failure(
- "Failed to unmount sandbox '" + info->sandbox.get() +
- "': " + unmount.error());
- }
+ Try<Nothing> unmount = fs::unmount(sandbox);
+ if (unmount.isError()) {
+ return Failure(
+ "Failed to unmount sandbox/work directory '" + sandbox +
+ "': " + unmount.error());
}
- infos.erase(containerId);
-
// Destroy the provisioned root filesystems.
list<Future<bool>> futures;
foreachvalue (const Owned<Provisioner>& provisioner, provisioners) {
http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/provisioners/appc/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/provisioners/appc/provisioner.cpp b/src/slave/containerizer/provisioners/appc/provisioner.cpp
index cd29a00..77f9cbe 100644
--- a/src/slave/containerizer/provisioners/appc/provisioner.cpp
+++ b/src/slave/containerizer/provisioners/appc/provisioner.cpp
@@ -344,8 +344,8 @@ Future<bool> AppcProvisionerProcess::destroy(const ContainerID& containerId)
return false;
}
- // Unregister the container first. If destroy() fails, we can rely on
- // recover() to retry it later.
+ // Unregister the container first. If destroy() fails, we can rely
+ // on recover() to retry it later.
Owned<Info> info = infos[containerId];
infos.erase(containerId);
@@ -365,19 +365,26 @@ Future<bool> AppcProvisionerProcess::destroy(const ContainerID& containerId)
}
}
+ // NOTE: We calculate 'containerDir' here so that the following
+ // lambda does not need to bind 'this'.
+ string containerDir =
+ provisioners::paths::getContainerDir(root, containerId);
+
// TODO(xujyan): Revisit the usefulness of this return value.
return collect(futures)
- .then([=]() -> Future<bool> {
- // This should be fairly cheap as the directory should only contain a
- // few empty sub-directories at this point.
- string containerDir =
- provisioners::paths::getContainerDir(root, containerId);
-
+ .then([containerDir]() -> Future<bool> {
+ // This should be fairly cheap as the directory should only
+ // contain a few empty sub-directories at this point.
+ //
+ // TODO(jieyu): Currently, it's possible that some directories
+ // cannot be removed due to EBUSY. EBUSY is caused by the race
+ // between cleaning up this container and new containers copying
+ // the host mount table. It's OK to ignore them. The cleanup
+ // will be retried during slave recovery.
Try<Nothing> rmdir = os::rmdir(containerDir);
if (rmdir.isError()) {
- return Failure("Failed to remove container directory '" +
- containerDir + "' of container " +
- stringify(containerId));
+ LOG(ERROR) << "Failed to remove the provisioned container directory "
+ << "at '" << containerDir << "'";
}
return true;
http://git-wip-us.apache.org/repos/asf/mesos/blob/304032b4/src/slave/containerizer/provisioners/backends/bind.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/provisioners/backends/bind.cpp b/src/slave/containerizer/provisioners/backends/bind.cpp
index 1cdae61..71861a9 100644
--- a/src/slave/containerizer/provisioners/backends/bind.cpp
+++ b/src/slave/containerizer/provisioners/backends/bind.cpp
@@ -16,6 +16,10 @@
* limitations under the License.
*/
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
#include <process/dispatch.hpp>
#include <process/process.hpp>
@@ -150,9 +154,9 @@ Future<bool> BindBackendProcess::destroy(const string& rootfs)
}
foreach (const fs::MountInfoTable::Entry& entry, mountTable.get().entries) {
- // TODO(xujyan): If MS_REC was used in 'provision()' we would need to
- // check `strings::startsWith(entry.target, rootfs)` here to unmount
- // all nested mounts.
+ // TODO(xujyan): If MS_REC was used in 'provision()' we would need
+ // to check `strings::startsWith(entry.target, rootfs)` here to
+ // unmount all nested mounts.
if (entry.target == rootfs) {
// NOTE: This would fail if the rootfs is still in use.
Try<Nothing> unmount = fs::unmount(entry.target);
@@ -162,11 +166,23 @@ Future<bool> BindBackendProcess::destroy(const string& rootfs)
unmount.error());
}
- Try<Nothing> rmdir = os::rmdir(rootfs);
- if (rmdir.isError()) {
- return Failure(
- "Failed to remove rootfs mount point '" + rootfs + "': " +
- rmdir.error());
+ // TODO(jieyu): If 'rmdir' here returns EBUSY, we still returns
+ // a success. This is currently possible because the parent
+ // mount of 'rootfs' might not be a shared mount. Thus,
+ // containers in different mount namespaces might hold extra
+ // references to this mount. It is OK to ignore the EBUSY error
+ // because the provisioner will later try to delete all the
+ // rootfses for the terminated containers.
+ if (::rmdir(rootfs.c_str()) != 0) {
+ string message =
+ "Failed to remove rootfs mount point '" + rootfs +
+ "': " + strerror(errno);
+
+ if (errno == EBUSY) {
+ LOG(ERROR) << message;
+ } else {
+ return Failure(message);
+ }
}
return true;