You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gi...@apache.org on 2017/11/20 20:31:28 UTC
[5/5] mesos git commit: Implemented pruneImages with a mark and sweep
in docker store.
Implemented pruneImages with a mark and sweep in docker store.
This includes the following changes:
- add a `pruneImages()` function on the chain of relevant classes;
- implement prune in docker store;
- fix mock interface to keep existing tests pass.
Review: https://reviews.apache.org/r/56721/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bdb604a9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bdb604a9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bdb604a9
Branch: refs/heads/master
Commit: bdb604a9dc29ab7bc4b9398cf4c1a2bd8b6061c4
Parents: e273efe
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:31 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:29:53 2017 -0800
----------------------------------------------------------------------
src/slave/containerizer/composing.cpp | 21 +++
src/slave/containerizer/composing.hpp | 2 +
src/slave/containerizer/containerizer.hpp | 3 +
src/slave/containerizer/docker.cpp | 7 +
src/slave/containerizer/docker.hpp | 2 +
src/slave/containerizer/mesos/containerizer.cpp | 39 ++++
src/slave/containerizer/mesos/containerizer.hpp | 4 +
.../provisioner/docker/metadata_manager.cpp | 50 ++++-
.../provisioner/docker/metadata_manager.hpp | 14 ++
.../mesos/provisioner/docker/paths.cpp | 25 +++
.../mesos/provisioner/docker/paths.hpp | 15 ++
.../mesos/provisioner/docker/store.cpp | 151 ++++++++++++++-
.../mesos/provisioner/docker/store.hpp | 5 +
.../mesos/provisioner/provisioner.cpp | 184 ++++++++++++++-----
.../mesos/provisioner/provisioner.hpp | 24 +++
.../containerizer/mesos/provisioner/store.cpp | 10 +
.../containerizer/mesos/provisioner/store.hpp | 18 ++
src/tests/containerizer.cpp | 17 ++
src/tests/containerizer.hpp | 7 +
src/tests/containerizer/mock_containerizer.hpp | 2 +
20 files changed, 547 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp
index 64919ef..9a9e11b 100644
--- a/src/slave/containerizer/composing.cpp
+++ b/src/slave/containerizer/composing.cpp
@@ -95,6 +95,8 @@ public:
Future<Nothing> remove(const ContainerID& containerId);
+ Future<Nothing> pruneImages();
+
private:
// Continuations.
Future<Nothing> _recover();
@@ -257,6 +259,12 @@ Future<Nothing> ComposingContainerizer::remove(const ContainerID& containerId)
}
+Future<Nothing> ComposingContainerizer::pruneImages()
+{
+ return dispatch(process, &ComposingContainerizerProcess::pruneImages);
+}
+
+
ComposingContainerizerProcess::~ComposingContainerizerProcess()
{
foreach (Containerizer* containerizer, containerizers_) {
@@ -687,6 +695,19 @@ Future<Nothing> ComposingContainerizerProcess::remove(
return containers_[rootContainerId]->containerizer->remove(containerId);
}
+
+Future<Nothing> ComposingContainerizerProcess::pruneImages()
+{
+ list<Future<Nothing>> futures;
+
+ foreach (Containerizer* containerizer, containerizers_) {
+ futures.push_back(containerizer->pruneImages());
+ }
+
+ return collect(futures)
+ .then([]() { return Nothing(); });
+}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.hpp b/src/slave/containerizer/composing.hpp
index c2689cf..3d00609 100644
--- a/src/slave/containerizer/composing.hpp
+++ b/src/slave/containerizer/composing.hpp
@@ -86,6 +86,8 @@ public:
virtual process::Future<Nothing> remove(const ContainerID& containerId);
+ virtual process::Future<Nothing> pruneImages();
+
private:
ComposingContainerizerProcess* process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp
index 2027bd9..7a0c6fc 100644
--- a/src/slave/containerizer/containerizer.hpp
+++ b/src/slave/containerizer/containerizer.hpp
@@ -165,6 +165,9 @@ public:
{
return process::Failure("Unsupported");
}
+
+ // Prune unused images from supported image stores.
+ virtual process::Future<Nothing> pruneImages() = 0;
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 63432a9..9918d83 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -876,6 +876,13 @@ Future<hashset<ContainerID>> DockerContainerizer::containers()
}
+Future<Nothing> DockerContainerizer::pruneImages()
+{
+ VLOG(1) << "DockerContainerizer does not support pruneImages";
+ return Nothing();
+}
+
+
Future<Nothing> DockerContainerizerProcess::recover(
const Option<SlaveState>& state)
{
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index 105c068..9df9849 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -109,6 +109,8 @@ public:
virtual process::Future<hashset<ContainerID>> containers();
+ virtual process::Future<Nothing> pruneImages();
+
private:
process::Owned<DockerContainerizerProcess> process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index bf71db1..7f3b86d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -656,6 +656,12 @@ Future<Nothing> MesosContainerizer::remove(const ContainerID& containerId)
}
+Future<Nothing> MesosContainerizer::pruneImages()
+{
+ return dispatch(process.get(), &MesosContainerizerProcess::pruneImages);
+}
+
+
Future<Nothing> MesosContainerizerProcess::recover(
const Option<state::SlaveState>& state)
{
@@ -2818,6 +2824,39 @@ Future<hashset<ContainerID>> MesosContainerizerProcess::containers()
}
+Future<Nothing> MesosContainerizerProcess::pruneImages()
+{
+ vector<Image> excludedImages;
+ excludedImages.reserve(containers_.size());
+
+ foreachpair (
+ const ContainerID& containerId,
+ const Owned<Container>& container,
+ containers_) {
+ // Checkpointing ContainerConfig is introduced recently. Legacy containers
+ // do not have the information of which image is used. Image pruning is
+ // disabled.
+ if (container->config.isNone()) {
+ return Failure(
+ "Container " + stringify(containerId) +
+ " does not have ContainerConfig "
+ "checkpointed. Image pruning is disabled");
+ }
+
+ const ContainerConfig& containerConfig = container->config.get();
+ if (containerConfig.has_container_info() &&
+ containerConfig.container_info().mesos().has_image()) {
+ excludedImages.push_back(
+ containerConfig.container_info().mesos().image());
+ }
+ }
+
+ // TODO(zhitao): use std::unique to deduplicate `excludedImages`.
+
+ return provisioner->pruneImages(excludedImages);
+}
+
+
MesosContainerizerProcess::Metrics::Metrics()
: container_destroy_errors(
"containerizer/mesos/container_destroy_errors")
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index e859b5b..e2739e0 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -111,6 +111,8 @@ public:
virtual process::Future<Nothing> remove(const ContainerID& containerId);
+ virtual process::Future<Nothing> pruneImages();
+
private:
explicit MesosContainerizer(
const process::Owned<MesosContainerizerProcess>& process);
@@ -181,6 +183,8 @@ public:
virtual process::Future<hashset<ContainerID>> containers();
+ virtual process::Future<Nothing> pruneImages();
+
private:
enum State
{
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
index d86afd2..1ab66c1 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
@@ -67,7 +67,8 @@ public:
const spec::ImageReference& reference,
bool cached);
- // TODO(chenlily): Implement removal of unreferenced images.
+ Future<hashset<string>> prune(
+ const vector<spec::ImageReference>& excludedImages);
private:
// Write out metadata manager state to persistent store.
@@ -134,6 +135,16 @@ Future<Option<Image>> MetadataManager::get(
}
+Future<hashset<string>> MetadataManager::prune(
+ const vector<spec::ImageReference>& excludedImages)
+{
+ return dispatch(
+ process.get(),
+ &MetadataManagerProcess::prune,
+ excludedImages);
+}
+
+
Future<Image> MetadataManagerProcess::put(
const spec::ImageReference& reference,
const vector<string>& layerIds)
@@ -180,6 +191,43 @@ Future<Option<Image>> MetadataManagerProcess::get(
}
+Future<hashset<string>> MetadataManagerProcess::prune(
+ const vector<spec::ImageReference>& excludedImages)
+{
+ hashmap<string, Image> retainedImages;
+ hashset<string> retainedLayers;
+
+ foreach (const spec::ImageReference& reference, excludedImages) {
+ const string imageName = stringify(reference);
+ Option<Image> image = storedImages.get(imageName);
+
+ if (image.isNone()) {
+ // This is possible if docker store was cleaned
+ // in a recovery after the container using this image was
+ // launched.
+ VLOG(1) << "Excluded docker image '" << imageName
+ << "' is not cached in metadata manager.";
+ continue;
+ }
+
+ retainedImages[imageName] = image.get();
+
+ foreach (const string& layerId, image->layer_ids()) {
+ retainedLayers.insert(layerId);
+ }
+ }
+
+ storedImages = std::move(retainedImages);
+
+ Try<Nothing> status = persist();
+ if (status.isError()) {
+ return Failure("Failed to save state of Docker images: " + status.error());
+ }
+
+ return retainedLayers;
+}
+
+
Try<Nothing> MetadataManagerProcess::persist()
{
Images images;
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
index 954da16..cfafd44 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
@@ -21,6 +21,7 @@
#include <string>
#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
#include <stout/json.hpp>
#include <stout/option.hpp>
#include <stout/protobuf.hpp>
@@ -92,6 +93,19 @@ public:
const ::docker::spec::ImageReference& reference,
bool cached);
+ /**
+ * Prune images from the metadata manager by comparing
+ * existing images with active images in use. This function will
+ * remove all images not used anymore, and return the list of
+ * layers which are still referenced. The caller should
+ * ensure such layers are kept in best effort.
+ *
+ * @param excludedImages all images to exclude from pruning.
+ * @return a list of all layers still refered.
+ */
+ process::Future<hashset<std::string>> prune(
+ const std::vector<::docker::spec::ImageReference>& excludedImages);
+
private:
explicit MetadataManager(process::Owned<MetadataManagerProcess> process);
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
index cd684b3..f692552 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
@@ -18,8 +18,12 @@
#include "slave/containerizer/mesos/provisioner/docker/paths.hpp"
+#include <process/clock.hpp>
+
+#include <stout/os.hpp>
#include <stout/path.hpp>
+using std::list;
using std::string;
namespace mesos {
@@ -46,6 +50,13 @@ string getImageLayerPath(const string& storeDir, const string& layerId)
}
+Try<list<string>> listLayers(const string& storeDir)
+{
+ const string layersDir = path::join(storeDir, "layers");
+ return os::ls(layersDir);
+}
+
+
string getImageLayerManifestPath(const string& layerPath)
{
return path::join(layerPath, "json");
@@ -100,6 +111,20 @@ string getStoredImagesPath(const string& storeDir)
return path::join(storeDir, "storedImages");
}
+
+string getGcDir(const string& storeDir)
+{
+ return path::join(storeDir, "gc");
+}
+
+
+string getGcLayerPath(const string& storeDir, const string& layerId)
+{
+ return path::join(
+ getGcDir(storeDir),
+ layerId + "." + stringify(process::Clock::now().duration().ns()));
+}
+
} // namespace paths {
} // namespace docker {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
index 232c027..0cd7f31 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
@@ -17,8 +17,11 @@
#ifndef __PROVISIONER_DOCKER_PATHS_HPP__
#define __PROVISIONER_DOCKER_PATHS_HPP__
+#include <list>
#include <string>
+#include <stout/try.hpp>
+
namespace mesos {
namespace internal {
namespace slave {
@@ -40,6 +43,7 @@ namespace paths {
* |-- json(manifest)
* |-- VERSION
* |--storedImages (file holding on cached images)
+ * |--gc (dir holding marked layers to be sweeped)
*/
// TODO(gilbert): Clean up any unused method after refactoring.
@@ -90,6 +94,17 @@ std::string getImageArchiveTarPath(
std::string getStoredImagesPath(const std::string& storeDir);
+
+std::string getGcDir(const std::string& storeDir);
+
+
+std::string getGcLayerPath(
+ const std::string& storeDir,
+ const std::string& layerId);
+
+
+Try<std::list<std::string>> listLayers(const std::string& storeDir);
+
} // namespace paths {
} // namespace docker {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.cpp b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
index f357710..d64e6eb 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
@@ -24,13 +24,16 @@
#include <mesos/secret/resolver.hpp>
#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
#include <stout/json.hpp>
#include <stout/os.hpp>
#include <process/collect.hpp>
#include <process/defer.hpp>
#include <process/dispatch.hpp>
+#include <process/executor.hpp>
#include <process/id.hpp>
+#include <process/metrics/counter.hpp>
#include "slave/containerizer/mesos/provisioner/constants.hpp"
#include "slave/containerizer/mesos/provisioner/utils.hpp"
@@ -75,7 +78,9 @@ public:
: ProcessBase(process::ID::generate("docker-provisioner-store")),
flags(_flags),
metadataManager(_metadataManager),
- puller(_puller) {}
+ puller(_puller)
+ {
+ }
~StoreProcess() {}
@@ -85,6 +90,10 @@ public:
const mesos::Image& image,
const string& backend);
+ Future<Nothing> prune(
+ const std::vector<mesos::Image>& excludeImages,
+ const hashset<string>& activeLayerPaths);
+
private:
Future<Image> _get(
const spec::ImageReference& reference,
@@ -106,11 +115,18 @@ private:
const string& layerId,
const string& backend);
+ Future<Nothing> _prune(
+ const hashset<string>& activeLayerPaths,
+ const hashset<string>& retainedImageLayers);
+
const Flags flags;
Owned<MetadataManager> metadataManager;
Owned<Puller> puller;
hashmap<string, Owned<Promise<Image>>> pulling;
+
+ // For executing path removals in a separated actor.
+ process::Executor executor;
};
@@ -164,6 +180,12 @@ Try<Owned<slave::Store>> Store::create(
mkdir.error());
}
+ mkdir = os::mkdir(paths::getGcDir(flags.docker_store_dir));
+ if (mkdir.isError()) {
+ return Error("Failed to create Docker store gc directory: " +
+ mkdir.error());
+ }
+
Try<Owned<MetadataManager>> metadataManager = MetadataManager::create(flags);
if (metadataManager.isError()) {
return Error(metadataManager.error());
@@ -203,6 +225,15 @@ Future<ImageInfo> Store::get(
}
+Future<Nothing> Store::prune(
+ const vector<mesos::Image>& excludedImages,
+ const hashset<string>& activeLayerPaths)
+{
+ return dispatch(
+ process.get(), &StoreProcess::prune, excludedImages, activeLayerPaths);
+}
+
+
Future<Nothing> StoreProcess::recover()
{
return metadataManager->recover();
@@ -447,6 +478,124 @@ Future<Nothing> StoreProcess::moveLayer(
return Nothing();
}
+
+Future<Nothing> StoreProcess::prune(
+ const vector<mesos::Image>& excludedImages,
+ const hashset<string>& activeLayerPaths)
+{
+ // All existing pulling should have finished.
+ if (!pulling.empty()) {
+ return Failure("Cannot prune and pull at the same time");
+ }
+
+ vector<spec::ImageReference> imageReferences;
+ imageReferences.reserve(excludedImages.size());
+
+ foreach (const mesos::Image& image, excludedImages) {
+ Try<spec::ImageReference> reference =
+ spec::parseImageReference(image.docker().name());
+
+ if (reference.isError()) {
+ return Failure(
+ "Failed to parse docker image '" + image.docker().name() +
+ "': " + reference.error());
+ }
+
+ imageReferences.push_back(reference.get());
+ }
+
+ return metadataManager->prune(imageReferences)
+ .then(defer(self(), &Self::_prune, activeLayerPaths, lambda::_1));
+}
+
+
+Future<Nothing> StoreProcess::_prune(
+ const hashset<string>& activeLayerRootfses,
+ const hashset<string>& retainedLayerIds)
+{
+ Try<list<string>> allLayers = paths::listLayers(flags.docker_store_dir);
+ if (allLayers.isError()) {
+ return Failure("Failed to find all layer paths: " + allLayers.error());
+ }
+
+ // Paths in provisioner are layer rootfs. Normalize them to layer
+ // path.
+ hashset<string> activeLayerPaths;
+
+ foreach (const string& rootfsPath, activeLayerRootfses) {
+ activeLayerPaths.insert(Path(rootfsPath).dirname());
+ }
+
+ foreach (const string& layerId, allLayers.get()) {
+ if (retainedLayerIds.contains(layerId)) {
+ VLOG(1) << "Layer '" << layerId << "' is retained by image store cache";
+ continue;
+ }
+
+ const string layerPath =
+ paths::getImageLayerPath(flags.docker_store_dir, layerId);
+
+ if (activeLayerPaths.contains(layerPath)) {
+ VLOG(1) << "Layer '" << layerId << "' is retained by active container";
+ continue;
+ }
+
+ const string target =
+ paths::getGcLayerPath(flags.docker_store_dir, layerId);
+
+ if (os::exists(target)) {
+ return Failure("Marking phase target '" + target + "' already exists");
+ }
+
+ VLOG(1) << "Marking layer '" << layerId << "' to gc by renaming '"
+ << layerPath << "' to '" << target << "'";
+
+ Try<Nothing> rename = os::rename(layerPath, target);
+ if (rename.isError()) {
+ return Failure(
+ "Failed to move layer from '" + layerPath +
+ "' to '" + target + "': " + rename.error());
+ }
+ }
+
+ const string gcDir = paths::getGcDir(flags.docker_store_dir);
+ auto rmdirs = [gcDir]() {
+ Try<list<string>> targets = os::ls(gcDir);
+ if (targets.isError()) {
+ LOG(WARNING) << "Error when listing gcDir '" << gcDir
+ << "': " << targets.error();
+ return Nothing();
+ }
+
+ foreach (const string& target, targets.get()) {
+ const string path = path::join(gcDir, target);
+ // Run the removal operation with 'continueOnError = false'.
+ // A possible situation is that we incorrectly marked a layer
+ // which is still used by certain layer based backends (aufs, overlay).
+ // In such a case, we proceed with a warning and try to free up as much
+ // disk spaces as possible.
+ LOG(INFO) << "Deleting path '" << path << "'";
+ Try<Nothing> rmdir = os::rmdir(path, true, true, false);
+
+ if (rmdir.isError()) {
+ LOG(WARNING) << "Failed to delete '" << path << "': "
+ << rmdir.error();
+ } else {
+ LOG(INFO) << "Deleted '" << path << "'";
+ }
+ }
+
+ return Nothing();
+ };
+
+ // NOTE: All `rmdirs` calls are dispatched to one executor so that:
+ // 1. They do not block other dispatches;
+ // 2. They do not occupy all worker threads.
+ executor.execute(rmdirs);
+
+ return Nothing();
+}
+
} // namespace docker {
} // namespace slave {
} // namespace internal {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.hpp b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
index 1cf6866..a420fa0 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
@@ -21,6 +21,7 @@
#include <process/owned.hpp>
+#include <stout/hashset.hpp>
#include <stout/try.hpp>
#include "slave/flags.hpp"
@@ -58,6 +59,10 @@ public:
const mesos::Image& image,
const std::string& backend);
+ virtual process::Future<Nothing> prune(
+ const std::vector<mesos::Image>& excludeImages,
+ const hashset<std::string>& activeLayerPaths);
+
private:
explicit Store(process::Owned<StoreProcess> process);
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index 1723588..d19dc1a 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -32,6 +32,7 @@
#include <stout/foreach.hpp>
#include <stout/hashmap.hpp>
#include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
#include <stout/os.hpp>
#include <stout/stringify.hpp>
#include <stout/uuid.hpp>
@@ -56,6 +57,7 @@ using std::vector;
using process::Failure;
using process::Future;
using process::Owned;
+using process::ReadWriteLock;
using mesos::internal::slave::AUFS_BACKEND;
using mesos::internal::slave::BIND_BACKEND;
@@ -312,6 +314,16 @@ Future<bool> Provisioner::destroy(const ContainerID& containerId) const
}
+Future<Nothing> Provisioner::pruneImages(
+ const vector<Image>& excludedImages) const
+{
+ return dispatch(
+ CHECK_NOTNULL(process.get()),
+ &ProvisionerProcess::pruneImages,
+ excludedImages);
+}
+
+
ProvisionerProcess::ProvisionerProcess(
const string& _rootDir,
const string& _defaultBackend,
@@ -450,20 +462,28 @@ Future<ProvisionInfo> ProvisionerProcess::provision(
const ContainerID& containerId,
const Image& image)
{
- if (!stores.contains(image.type())) {
- return Failure(
- "Unsupported container image type: " +
- stringify(image.type()));
- }
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.read_lock()
+ .then(defer(self(), [this, containerId, image]() -> Future<ProvisionInfo> {
+ if (!stores.contains(image.type())) {
+ return Failure(
+ "Unsupported container image type: " + stringify(image.type()));
+ }
- // Get and then provision image layers from the store.
- return stores.get(image.type()).get()->get(image, defaultBackend)
- .then(defer(self(),
- &Self::_provision,
- containerId,
- image,
- defaultBackend,
- lambda::_1));
+ // Get and then provision image layers from the store.
+ return stores.get(image.type()).get()->get(image, defaultBackend)
+ .then(defer(
+ self(),
+ &Self::_provision,
+ containerId,
+ image,
+ defaultBackend,
+ lambda::_1));
+ }))
+ .onAny(defer(self(), [this](const Future<ProvisionInfo>&) {
+ rwLock.read_unlock();
+ }));
}
@@ -510,7 +530,7 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
ContainerLayers containerLayers;
- foreach(const string& layer, imageInfo.layers) {
+ foreach (const string& layer, imageInfo.layers) {
containerLayers.add_paths(layer);
}
@@ -529,46 +549,55 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
Future<bool> ProvisionerProcess::destroy(const ContainerID& containerId)
{
- if (!infos.contains(containerId)) {
- VLOG(1) << "Ignoring destroy request for unknown container " << containerId;
-
- return false;
- }
-
- if (infos[containerId]->destroying) {
- return infos[containerId]->termination.future();
- }
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.read_lock()
+ .then(defer(self(), [this, containerId]() -> Future<bool> {
+ if (!infos.contains(containerId)) {
+ VLOG(1) << "Ignoring destroy request for unknown container "
+ << containerId;
+
+ return false;
+ }
- infos[containerId]->destroying = true;
+ if (infos[containerId]->destroying) {
+ return infos[containerId]->termination.future();
+ }
- // Provisioner destroy can be invoked from:
- // 1. Provisioner `recover` to destroy all unknown orphans.
- // 2. Containerizer `recover` to destroy known orphans.
- // 3. Containerizer `destroy` on one specific container.
- //
- // NOTE: For (2) and (3), we expect the container being destroyed
- // has no any child contain remain running. However, for case (1),
- // if the container runtime directory does not survive after the
- // machine reboots and the provisioner directory under the agent
- // work dir still exists, all containers will be regarded as
- // unknown containers and will be destroyed. In this case, a parent
- // container may be destroyed before its child containers are
- // cleaned up. So we have to make `destroy()` recursively for
- // this particular case.
- //
- // TODO(gilbert): Move provisioner directory to the container
- // runtime directory after a deprecation cycle to avoid
- // making `provisioner::destroy()` being recursive.
- list<Future<bool>> destroys;
-
- foreachkey (const ContainerID& entry, infos) {
- if (entry.has_parent() && entry.parent() == containerId) {
- destroys.push_back(destroy(entry));
- }
- }
+ infos[containerId]->destroying = true;
+
+ // Provisioner destroy can be invoked from:
+ // 1. Provisioner `recover` to destroy all unknown orphans.
+ // 2. Containerizer `recover` to destroy known orphans.
+ // 3. Containerizer `destroy` on one specific container.
+ //
+ // NOTE: For (2) and (3), we expect the container being destroyed
+ // has no any child contain remain running. However, for case (1),
+ // if the container runtime directory does not survive after the
+ // machine reboots and the provisioner directory under the agent
+ // work dir still exists, all containers will be regarded as
+ // unknown containers and will be destroyed. In this case, a parent
+ // container may be destroyed before its child containers are
+ // cleaned up. So we have to make `destroy()` recursively for
+ // this particular case.
+ //
+ // TODO(gilbert): Move provisioner directory to the container
+ // runtime directory after a deprecation cycle to avoid
+ // making `provisioner::destroy()` being recursive.
+ list<Future<bool>> destroys;
+
+ foreachkey (const ContainerID& entry, infos) {
+ if (entry.has_parent() && entry.parent() == containerId) {
+ destroys.push_back(destroy(entry));
+ }
+ }
- return await(destroys)
- .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+ return await(destroys)
+ .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+ }))
+ .onAny(defer(self(), [this](const Future<bool>&) {
+ rwLock.read_unlock();
+ }));
}
@@ -661,6 +690,59 @@ Future<bool> ProvisionerProcess::__destroy(const ContainerID& containerId)
}
+Future<Nothing> ProvisionerProcess::pruneImages(
+ const vector<Image>& excludedImages)
+{
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.write_lock()
+ .then(defer(self(), [this, excludedImages]() -> Future<Nothing> {
+ hashset<string> activeLayerPaths;
+
+ foreachpair (
+ const ContainerID& containerId, const Owned<Info>& info, infos) {
+ if (info->layers.isNone()) {
+ // There are several possibilities if layer information missing:
+ // - legacy containers provisioned before layer checkpointing:
+ // they should already be excluded by the containerizer;
+ // - the agent crashed after `backend::provision()` finished but
+ // before checkpointing the `layers`. In such a case, the rootfs
+ // should not be used by any running containers yet so it is safe
+ // to skip those layers;
+ // - checkpointed layer files were manually deleted: we do not expect
+ // this to be allowd, but log it for information purpose.
+ VLOG(1) << "Container " << containerId
+ << " has no checkpointed layers";
+
+ continue;
+ }
+
+ activeLayerPaths.insert(info->layers->begin(), info->layers->end());
+ }
+
+ list<Future<Nothing>> futures;
+
+ foreachpair (
+ const Image::Type& type, const Owned<Store>& store, stores) {
+ vector<Image> images;
+ foreach (const Image& image, excludedImages) {
+ if (image.type() == type) {
+ images.push_back(image);
+ }
+ }
+
+ futures.push_back(store.get()->prune(images, activeLayerPaths));
+ }
+
+ return collect(futures)
+ .then([]() { return Nothing(); });
+ }))
+ .onAny(defer(self(), [this](const Future<Nothing>&) {
+ rwLock.write_unlock();
+ }));
+}
+
+
ProvisionerProcess::Metrics::Metrics()
: remove_container_errors(
"containerizer/mesos/provisioner/remove_container_errors")
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.hpp b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
index b87144c..88d7019 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.hpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
@@ -34,6 +34,7 @@
#include <process/future.hpp>
#include <process/owned.hpp>
+#include <process/rwlock.hpp>
#include <process/metrics/counter.hpp>
#include <process/metrics/metrics.hpp>
@@ -102,6 +103,12 @@ public:
// provisioned root filesystem for the given container.
virtual process::Future<bool> destroy(const ContainerID& containerId) const;
+ // Prune images in different stores. Image references in excludedImages
+ // will be passed to stores and retained in a best effort fashion.
+ // All layer paths used by active containers will not be pruned.
+ virtual process::Future<Nothing> pruneImages(
+ const std::vector<Image>& excludedImages) const;
+
protected:
Provisioner() {} // For creating mock object.
@@ -132,6 +139,9 @@ public:
process::Future<bool> destroy(const ContainerID& containerId);
+ process::Future<Nothing> pruneImages(
+ const std::vector<Image>& excludedImages);
+
private:
process::Future<ProvisionInfo> _provision(
const ContainerID& containerId,
@@ -186,6 +196,20 @@ private:
process::metrics::Counter remove_container_errors;
} metrics;
+
+ // This `ReadWriteLock` instance is used to protect the critical
+ // section, which includes store directory and provision directory.
+ // Because `provision` and `destroy` are scoped by `containerId`,
+ // they are not expected to touch the same critical section
+ // simultaneously, so any `provision` and `destroy` can happen concurrently.
+ // This is guaranteed by Mesos containerizer, e.g., a `destroy` will always
+ // wait for a container's `provision` to finish, then do the cleanup.
+ //
+ // On the other hand, `pruneImages` needs to know all active layers from all
+ // containers, therefore it must be exclusive to other `provision`, `destroy`
+ // and `pruneImages` so that we do not prune image layers which is used by an
+ // active `provision` or `destroy`.
+ process::ReadWriteLock rwLock;
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.cpp b/src/slave/containerizer/mesos/provisioner/store.cpp
index cc5cc81..11fce0e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/store.cpp
@@ -15,6 +15,7 @@
// limitations under the License.
#include <string>
+#include <vector>
#include <mesos/type_utils.hpp>
@@ -31,6 +32,7 @@
#include "slave/containerizer/mesos/provisioner/docker/store.hpp"
using std::string;
+using std::vector;
using process::Owned;
@@ -86,6 +88,14 @@ Try<hashmap<Image::Type, Owned<Store>>> Store::create(
return stores;
}
+
+process::Future<Nothing> Store::prune(
+ const vector<Image>& excludeImages,
+ const hashset<string>& activeLayerPaths)
+{
+ return Nothing();
+}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.hpp b/src/slave/containerizer/mesos/provisioner/store.hpp
index 01ab83d..a4ae00e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/store.hpp
@@ -31,6 +31,7 @@
#include <process/future.hpp>
#include <process/owned.hpp>
+#include <stout/hashset.hpp>
#include <stout/try.hpp>
#include "slave/flags.hpp"
@@ -87,6 +88,23 @@ public:
virtual process::Future<ImageInfo> get(
const Image& image,
const std::string& backend) = 0;
+
+ // Prune unused images from the given store. This is called within
+ // an exclusive lock from `provisioner`, which means any other
+ // image provision or prune are blocked until the future is satsified,
+ // so an implementation should minimize the blocking time.
+ //
+ // Any image specified in `excludedImages` should not be pruned if
+ // it is already cached previously.
+ //
+ // On top of this, all layer paths used to provisioner all active
+ // containers are also passed in `activeLayerPaths`, and these layers
+ // should also be retained. Because in certain store (e.g, docker store)
+ // the cache is not source of truth, and we need to not only keep the
+ // excluded images, but also maintain the cache.
+ virtual process::Future<Nothing> prune(
+ const std::vector<Image>& excludedImages,
+ const hashset<std::string>& activeLayerPaths);
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.cpp b/src/tests/containerizer.cpp
index c6f1ec0..665bd5d 100644
--- a/src/tests/containerizer.cpp
+++ b/src/tests/containerizer.cpp
@@ -31,6 +31,7 @@ using process::http::Connection;
using std::map;
using std::shared_ptr;
using std::string;
+using std::vector;
using testing::_;
using testing::Invoke;
@@ -313,6 +314,11 @@ public:
return containers_.keys();
}
+ Future<Nothing> pruneImages()
+ {
+ return Nothing();
+ }
+
private:
struct ContainerData
{
@@ -437,6 +443,9 @@ void TestContainerizer::setup()
EXPECT_CALL(*this, kill(_, _))
.WillRepeatedly(Invoke(this, &TestContainerizer::_kill));
+
+ EXPECT_CALL(*this, pruneImages())
+ .WillRepeatedly(Invoke(this, &TestContainerizer::_pruneImages));
}
@@ -568,6 +577,14 @@ Future<hashset<ContainerID>> TestContainerizer::containers()
&TestContainerizerProcess::containers);
}
+
+Future<Nothing> TestContainerizer::_pruneImages()
+{
+ return process::dispatch(
+ process.get(),
+ &TestContainerizerProcess::pruneImages);
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.hpp b/src/tests/containerizer.hpp
index c98913f..3d162fe 100644
--- a/src/tests/containerizer.hpp
+++ b/src/tests/containerizer.hpp
@@ -122,6 +122,10 @@ public:
kill,
process::Future<bool>(const ContainerID&, int));
+ MOCK_METHOD0(
+ pruneImages,
+ process::Future<Nothing>());
+
// Additional destroy method for testing because we won't know the
// ContainerID created for each container.
process::Future<bool> destroy(
@@ -163,10 +167,13 @@ private:
process::Future<bool> _destroy(
const ContainerID& containerId);
+
process::Future<bool> _kill(
const ContainerID& containerId,
int status);
+ process::Future<Nothing> _pruneImages();
+
process::Owned<TestContainerizerProcess> process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer/mock_containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/mock_containerizer.hpp b/src/tests/containerizer/mock_containerizer.hpp
index 5befccc..bbfa768 100644
--- a/src/tests/containerizer/mock_containerizer.hpp
+++ b/src/tests/containerizer/mock_containerizer.hpp
@@ -81,6 +81,8 @@ public:
MOCK_METHOD0(
containers,
process::Future<hashset<ContainerID>>());
+
+ MOCK_METHOD0(pruneImages, process::Future<Nothing>());
};
} // namespace tests {