You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/01/26 21:32:00 UTC
mesos git commit: Handled quota when volumes are bind mounted into
the sandbox.
Repository: mesos
Updated Branches:
refs/heads/master f8176feee -> 7ba659cdc
Handled quota when volumes are bind mounted into the sandbox.
Review: https://reviews.apache.org/r/41818/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7ba659cd
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7ba659cd
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7ba659cd
Branch: refs/heads/master
Commit: 7ba659cdccb29365b79e7ed62ce435fd1cb91920
Parents: f8176fe
Author: Artem Harutyunyan <ar...@mesosphere.io>
Authored: Tue Jan 26 11:04:09 2016 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Jan 26 12:20:45 2016 -0800
----------------------------------------------------------------------
.../mesos/isolators/posix/disk.cpp | 81 ++++++++++++++----
.../mesos/isolators/posix/disk.hpp | 4 +-
.../containerizer/filesystem_isolator_tests.cpp | 89 ++++++++++++++++++++
src/tests/disk_quota_tests.cpp | 35 +++++++-
4 files changed, 189 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/slave/containerizer/mesos/isolators/posix/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/posix/disk.cpp b/src/slave/containerizer/mesos/isolators/posix/disk.cpp
index 23e4dcd..5ed9353 100644
--- a/src/slave/containerizer/mesos/isolators/posix/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/posix/disk.cpp
@@ -157,6 +157,9 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
// This stores the updated quotas.
hashmap<string, Resources> quotas;
+ // Volume paths to exclude from sandbox disk usage calculation.
+ vector<string> excludes;
+
foreach (const Resource& resource, resources) {
if (resource.name() != "disk") {
continue;
@@ -176,6 +179,8 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
// we extract the path from the protobuf.
path = resource.disk().volume().container_path();
+ excludes.push_back(path);
+
// In case the path in the protobuf is not an absolute path it is
// relative to the working directory of the executor. We always store
// the absolute path.
@@ -189,11 +194,13 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
quotas[path] += resource;
}
- // Update the quota for paths. For each new path, we also initiate
+ // Update the quota for paths. For each new path we also initiate
// the disk usage collection.
foreachpair (const string& path, const Resources& quota, quotas) {
if (!info->paths.contains(path)) {
- info->paths[path].usage = collector.usage(path)
+ info->paths[path].usage = collector.usage(
+ path,
+ (path == info->directory) ? excludes : vector<string>())
.onAny(defer(
PID<PosixDiskIsolatorProcess>(this),
&PosixDiskIsolatorProcess::_collect,
@@ -265,13 +272,27 @@ void PosixDiskIsolatorProcess::_collect(
}
}
- info->paths[path].usage = collector.usage(path)
- .onAny(defer(
- PID<PosixDiskIsolatorProcess>(this),
- &PosixDiskIsolatorProcess::_collect,
- containerId,
- path,
- lambda::_1));
+ // Build excludes array if the current path is the sandbox.
+ vector<string> excludes;
+ if (path == info->directory) {
+ foreachkey (const string& exclude, info->paths) {
+ if (exclude != path) {
+ // `du --exclude` uses pattern matching so we strip both
+ // prefix (sandbox path) and suffix ('/') from volume path.
+ string relative = strings::remove(exclude, path + "/", strings::PREFIX);
+ relative = strings::remove(relative, "/", strings::SUFFIX);
+ excludes.push_back(relative);
+ }
+ }
+ }
+
+ info->paths[path].usage = collector.usage(path, excludes)
+ .onAny(defer(
+ PID<PosixDiskIsolatorProcess>(this),
+ &PosixDiskIsolatorProcess::_collect,
+ containerId,
+ path,
+ lambda::_1));
}
@@ -325,15 +346,21 @@ public:
DiskUsageCollectorProcess(const Duration& _interval) : interval(_interval) {}
virtual ~DiskUsageCollectorProcess() {}
- Future<Bytes> usage(const string& path)
+ Future<Bytes> usage(
+ const string& path,
+ const vector<string>& excludes)
{
+ // TODO(jieyu): 'excludes' is not supported on OSX. We should
+ // either return a Failure here, or does not allow 'excludes' to
+ // be specifed on OSX.
+
foreach (const Owned<Entry>& entry, entries) {
if (entry->path == path) {
return entry->promise.future();
}
}
- entries.push_back(Owned<Entry>(new Entry(path)));
+ entries.push_back(Owned<Entry>(new Entry(path, excludes)));
// Install onDiscard callback.
Future<Bytes> future = entries.back()->promise.future();
@@ -363,9 +390,12 @@ private:
// Describe a single pending check.
struct Entry
{
- explicit Entry(const string& _path) : path(_path) {}
+ explicit Entry(const string& _path, const vector<string>& _excludes)
+ : path(_path),
+ excludes(_excludes) {}
string path;
+ vector<string> excludes;
Option<Subprocess> du;
Promise<Bytes> promise;
};
@@ -417,9 +447,28 @@ private:
// will be that cgroup that is charged for (a) memory to cache the
// fs data structures, (b) disk I/O to read those structures, and
// (c) the cpu time to traverse.
+
+ // Construct the 'du' command.
+ vector<string> command = {
+ "du",
+ "-k", // Use 1K size blocks for consistent results across platforms.
+ "-s", // Use 'silent' output mode.
+ };
+
+#ifdef __linux__
+ // Add paths that need to be excluded.
+ foreach (const string& exclude, entry->excludes) {
+ command.push_back("--exclude");
+ command.push_back(exclude);
+ }
+#endif
+
+ // Add path on which 'du' must be run.
+ command.push_back(entry->path);
+
Try<Subprocess> s = subprocess(
"du",
- vector<string>({"du", "-k", "-s", entry->path}),
+ command,
Subprocess::PATH("/dev/null"),
Subprocess::PIPE(),
Subprocess::PIPE(),
@@ -524,9 +573,11 @@ DiskUsageCollector::~DiskUsageCollector()
}
-Future<Bytes> DiskUsageCollector::usage(const string& path)
+Future<Bytes> DiskUsageCollector::usage(
+ const string& path,
+ const vector<string>& excludes)
{
- return dispatch(process, &DiskUsageCollectorProcess::usage, path);
+ return dispatch(process, &DiskUsageCollectorProcess::usage, path, excludes);
}
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/slave/containerizer/mesos/isolators/posix/disk.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/posix/disk.hpp b/src/slave/containerizer/mesos/isolators/posix/disk.hpp
index 8e87432..5607c0f 100644
--- a/src/slave/containerizer/mesos/isolators/posix/disk.hpp
+++ b/src/slave/containerizer/mesos/isolators/posix/disk.hpp
@@ -48,7 +48,9 @@ public:
// Returns the disk usage rooted at 'path'. The user can discard the
// returned future to cancel the check.
- process::Future<Bytes> usage(const std::string& path);
+ process::Future<Bytes> usage(
+ const std::string& path,
+ const std::vector<std::string>& excludes);
private:
DiskUsageCollectorProcess* process;
http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/tests/containerizer/filesystem_isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/filesystem_isolator_tests.cpp b/src/tests/containerizer/filesystem_isolator_tests.cpp
index 496275a..8bc2e1d 100644
--- a/src/tests/containerizer/filesystem_isolator_tests.cpp
+++ b/src/tests/containerizer/filesystem_isolator_tests.cpp
@@ -1070,6 +1070,95 @@ TEST_F(LinuxFilesystemIsolatorTest, ROOT_WorkDirMountPreExists)
delete isolator.get();
}
+
+
+// This test verifies that the volume usage accounting for sandboxes with
+// bind-mounted volumes works correctly by creating a file within the volume
+// the size of which exceeds the sandbox quota.
+TEST_F(LinuxFilesystemIsolatorTest, ROOT_VolumeUsageExceedsSandboxQuota)
+{
+ FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.set_role("role1");
+
+ Try<PID<Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ slave::Flags flags = CreateSlaveFlags();
+ flags.isolation = "posix/disk,filesystem/linux";
+
+ // NOTE: We can't pause the clock because we need the reaper to reap
+ // the 'du' subprocess.
+ flags.container_disk_watch_interval = Milliseconds(1);
+ flags.enforce_container_disk_quota = true;
+ flags.resources = "cpus:2;mem:128;disk(role1):128";
+
+ Try<PID<Slave>> slave = StartSlave(flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+ Future<FrameworkID> frameworkId;
+ EXPECT_CALL(sched, registered(&driver, _, _))
+ .WillOnce(FutureArg<1>(&frameworkId));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(frameworkId);
+
+ AWAIT_READY(offers);
+ ASSERT_NE(0u, offers.get().size());
+
+ // We request a sandbox (1MB) that is smaller than the persistent
+ // volume (4MB) and attempt to create a file in that volume that is
+ // twice the size of the sanbox (2MB).
+ Resources volume = createPersistentVolume(
+ Megabytes(4),
+ "role1",
+ "id1",
+ "volume_path");
+
+ Resources taskResources =
+ Resources::parse("cpus:1;mem:64;disk(role1):1").get() + volume;
+
+ // We sleep to give quota enforcement (du) a chance to kick in.
+ TaskInfo task = createTask(
+ offers.get()[0].slave_id(),
+ taskResources,
+ "dd if=/dev/zero of=volume_path/file bs=1048576 count=2 && sleep 1");
+
+ Future<TaskStatus> statusRunning;
+ Future<TaskStatus> statusFinished;
+
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusRunning))
+ .WillOnce(FutureArg<1>(&statusFinished));
+
+ driver.acceptOffers(
+ {offers.get()[0].id()},
+ {CREATE(volume),
+ LAUNCH({task})});
+
+ AWAIT_READY(statusRunning);
+ EXPECT_EQ(task.task_id(), statusRunning.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, statusRunning.get().state());
+
+ AWAIT_READY(statusFinished);
+ EXPECT_EQ(task.task_id(), statusFinished.get().task_id());
+ EXPECT_EQ(TASK_FINISHED, statusFinished.get().state());
+
+ driver.stop();
+ driver.join();
+
+ Shutdown();
+}
+
#endif // __linux__
} // namespace tests {
http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/tests/disk_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/disk_quota_tests.cpp b/src/tests/disk_quota_tests.cpp
index 1577cf7..eb93c2f 100644
--- a/src/tests/disk_quota_tests.cpp
+++ b/src/tests/disk_quota_tests.cpp
@@ -81,7 +81,7 @@ TEST_F(DiskUsageCollectorTest, File)
DiskUsageCollector collector(Milliseconds(1));
- Future<Bytes> usage = collector.usage(path);
+ Future<Bytes> usage = collector.usage(path, {});
AWAIT_READY(usage);
// NOTE: A typical file system needs more disk space to keep meta
@@ -110,7 +110,7 @@ TEST_F(DiskUsageCollectorTest, Directory)
DiskUsageCollector collector(Milliseconds(1));
- Future<Bytes> usage = collector.usage(os::getcwd());
+ Future<Bytes> usage = collector.usage(os::getcwd(), {});
AWAIT_READY(usage);
EXPECT_GE(usage.get(), Kilobytes(15));
@@ -129,8 +129,8 @@ TEST_F(DiskUsageCollectorTest, SymbolicLink)
DiskUsageCollector collector(Milliseconds(1));
- Future<Bytes> usage1 = collector.usage(os::getcwd());
- Future<Bytes> usage2 = collector.usage(link);
+ Future<Bytes> usage1 = collector.usage(os::getcwd(), {});
+ Future<Bytes> usage2 = collector.usage(link, {});
AWAIT_READY(usage1);
EXPECT_GE(usage1.get(), Kilobytes(64));
@@ -141,6 +141,33 @@ TEST_F(DiskUsageCollectorTest, SymbolicLink)
}
+#ifdef __linux__
+// This test verifies that relative exclude paths work and that
+// absolute ones don't (in cases when the directory path itself
+// is relative).
+TEST_F(DiskUsageCollectorTest, ExcludeRelativePath)
+{
+ string file = path::join(os::getcwd(), "file");
+
+ // Create a 128k file.
+ ASSERT_SOME(os::write(file, string(Kilobytes(128).bytes(), 'x')));
+
+ DiskUsageCollector collector(Milliseconds(1));
+
+ // Exclude 'file' and make sure the usage is way below 128k.
+ Future<Bytes> usage1 = collector.usage(os::getcwd(), {"file"});
+ AWAIT_READY(usage1);
+ EXPECT_LT(usage1.get(), Kilobytes(64));
+
+ // Exclude absolute path of 'file' with the relative directory
+ // path. Pattern matching is expected to fail causing exclude
+ // to have no effect.
+ Future<Bytes> usage2 = collector.usage(".", {file});
+ EXPECT_GT(usage2.get(), Kilobytes(128));
+}
+#endif
+
+
class DiskQuotaTest : public MesosTest {};