You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/01/26 21:32:00 UTC

mesos git commit: Handled quota when volumes are bind mounted into the sandbox.

Repository: mesos
Updated Branches:
  refs/heads/master f8176feee -> 7ba659cdc


Handled quota when volumes are bind mounted into the sandbox.

Review: https://reviews.apache.org/r/41818/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7ba659cd
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7ba659cd
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7ba659cd

Branch: refs/heads/master
Commit: 7ba659cdccb29365b79e7ed62ce435fd1cb91920
Parents: f8176fe
Author: Artem Harutyunyan <ar...@mesosphere.io>
Authored: Tue Jan 26 11:04:09 2016 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Jan 26 12:20:45 2016 -0800

----------------------------------------------------------------------
 .../mesos/isolators/posix/disk.cpp              | 81 ++++++++++++++----
 .../mesos/isolators/posix/disk.hpp              |  4 +-
 .../containerizer/filesystem_isolator_tests.cpp | 89 ++++++++++++++++++++
 src/tests/disk_quota_tests.cpp                  | 35 +++++++-
 4 files changed, 189 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/slave/containerizer/mesos/isolators/posix/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/posix/disk.cpp b/src/slave/containerizer/mesos/isolators/posix/disk.cpp
index 23e4dcd..5ed9353 100644
--- a/src/slave/containerizer/mesos/isolators/posix/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/posix/disk.cpp
@@ -157,6 +157,9 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
   // This stores the updated quotas.
   hashmap<string, Resources> quotas;
 
+  // Volume paths to exclude from sandbox disk usage calculation.
+  vector<string> excludes;
+
   foreach (const Resource& resource, resources) {
     if (resource.name() != "disk") {
       continue;
@@ -176,6 +179,8 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
       // we extract the path from the protobuf.
       path = resource.disk().volume().container_path();
 
+      excludes.push_back(path);
+
       // In case the path in the protobuf is not an absolute path it is
       // relative to the working directory of the executor. We always store
       // the absolute path.
@@ -189,11 +194,13 @@ Future<Nothing> PosixDiskIsolatorProcess::update(
     quotas[path] += resource;
   }
 
-  // Update the quota for paths. For each new path, we also initiate
+  // Update the quota for paths. For each new path we also initiate
   // the disk usage collection.
   foreachpair (const string& path, const Resources& quota, quotas) {
     if (!info->paths.contains(path)) {
-      info->paths[path].usage = collector.usage(path)
+      info->paths[path].usage = collector.usage(
+          path,
+          (path == info->directory) ? excludes : vector<string>())
         .onAny(defer(
             PID<PosixDiskIsolatorProcess>(this),
             &PosixDiskIsolatorProcess::_collect,
@@ -265,13 +272,27 @@ void PosixDiskIsolatorProcess::_collect(
     }
   }
 
-  info->paths[path].usage = collector.usage(path)
-    .onAny(defer(
-        PID<PosixDiskIsolatorProcess>(this),
-        &PosixDiskIsolatorProcess::_collect,
-        containerId,
-        path,
-        lambda::_1));
+  // Build excludes array if the current path is the sandbox.
+  vector<string> excludes;
+  if (path == info->directory) {
+    foreachkey (const string& exclude, info->paths) {
+      if (exclude != path) {
+        // `du --exclude` uses pattern matching so we strip both
+        // prefix (sandbox path) and suffix ('/') from volume path.
+        string relative = strings::remove(exclude, path + "/", strings::PREFIX);
+        relative = strings::remove(relative, "/", strings::SUFFIX);
+        excludes.push_back(relative);
+      }
+    }
+  }
+
+  info->paths[path].usage = collector.usage(path, excludes)
+      .onAny(defer(
+          PID<PosixDiskIsolatorProcess>(this),
+          &PosixDiskIsolatorProcess::_collect,
+          containerId,
+          path,
+          lambda::_1));
 }
 
 
@@ -325,15 +346,21 @@ public:
   DiskUsageCollectorProcess(const Duration& _interval) : interval(_interval) {}
   virtual ~DiskUsageCollectorProcess() {}
 
-  Future<Bytes> usage(const string& path)
+  Future<Bytes> usage(
+      const string& path,
+      const vector<string>& excludes)
   {
+    // TODO(jieyu): 'excludes' is not supported on OSX. We should
+    // either return a Failure here, or does not allow 'excludes' to
+    // be specifed on OSX.
+
     foreach (const Owned<Entry>& entry, entries) {
       if (entry->path == path) {
         return entry->promise.future();
       }
     }
 
-    entries.push_back(Owned<Entry>(new Entry(path)));
+    entries.push_back(Owned<Entry>(new Entry(path, excludes)));
 
     // Install onDiscard callback.
     Future<Bytes> future = entries.back()->promise.future();
@@ -363,9 +390,12 @@ private:
   // Describe a single pending check.
   struct Entry
   {
-    explicit Entry(const string& _path) : path(_path) {}
+    explicit Entry(const string& _path, const vector<string>& _excludes)
+      : path(_path),
+        excludes(_excludes) {}
 
     string path;
+    vector<string> excludes;
     Option<Subprocess> du;
     Promise<Bytes> promise;
   };
@@ -417,9 +447,28 @@ private:
     // will be that cgroup that is charged for (a) memory to cache the
     // fs data structures, (b) disk I/O to read those structures, and
     // (c) the cpu time to traverse.
+
+    // Construct the 'du' command.
+    vector<string> command = {
+      "du",
+      "-k", // Use 1K size blocks for consistent results across platforms.
+      "-s", // Use 'silent' output mode.
+    };
+
+#ifdef __linux__
+    // Add paths that need to be excluded.
+    foreach (const string& exclude, entry->excludes) {
+      command.push_back("--exclude");
+      command.push_back(exclude);
+    }
+#endif
+
+    // Add path on which 'du' must be run.
+    command.push_back(entry->path);
+
     Try<Subprocess> s = subprocess(
         "du",
-        vector<string>({"du", "-k", "-s", entry->path}),
+        command,
         Subprocess::PATH("/dev/null"),
         Subprocess::PIPE(),
         Subprocess::PIPE(),
@@ -524,9 +573,11 @@ DiskUsageCollector::~DiskUsageCollector()
 }
 
 
-Future<Bytes> DiskUsageCollector::usage(const string& path)
+Future<Bytes> DiskUsageCollector::usage(
+    const string& path,
+    const vector<string>& excludes)
 {
-  return dispatch(process, &DiskUsageCollectorProcess::usage, path);
+  return dispatch(process, &DiskUsageCollectorProcess::usage, path, excludes);
 }
 
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/slave/containerizer/mesos/isolators/posix/disk.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/posix/disk.hpp b/src/slave/containerizer/mesos/isolators/posix/disk.hpp
index 8e87432..5607c0f 100644
--- a/src/slave/containerizer/mesos/isolators/posix/disk.hpp
+++ b/src/slave/containerizer/mesos/isolators/posix/disk.hpp
@@ -48,7 +48,9 @@ public:
 
   // Returns the disk usage rooted at 'path'. The user can discard the
   // returned future to cancel the check.
-  process::Future<Bytes> usage(const std::string& path);
+  process::Future<Bytes> usage(
+      const std::string& path,
+      const std::vector<std::string>& excludes);
 
 private:
   DiskUsageCollectorProcess* process;

http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/tests/containerizer/filesystem_isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/filesystem_isolator_tests.cpp b/src/tests/containerizer/filesystem_isolator_tests.cpp
index 496275a..8bc2e1d 100644
--- a/src/tests/containerizer/filesystem_isolator_tests.cpp
+++ b/src/tests/containerizer/filesystem_isolator_tests.cpp
@@ -1070,6 +1070,95 @@ TEST_F(LinuxFilesystemIsolatorTest, ROOT_WorkDirMountPreExists)
 
   delete isolator.get();
 }
+
+
+// This test verifies that the volume usage accounting for sandboxes with
+// bind-mounted volumes works correctly by creating a file within the volume
+// the size of which exceeds the sandbox quota.
+TEST_F(LinuxFilesystemIsolatorTest, ROOT_VolumeUsageExceedsSandboxQuota)
+{
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_role("role1");
+
+  Try<PID<Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.isolation = "posix/disk,filesystem/linux";
+
+  // NOTE: We can't pause the clock because we need the reaper to reap
+  // the 'du' subprocess.
+  flags.container_disk_watch_interval = Milliseconds(1);
+  flags.enforce_container_disk_quota = true;
+  flags.resources = "cpus:2;mem:128;disk(role1):128";
+
+  Try<PID<Slave>> slave = StartSlave(flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+  Future<FrameworkID> frameworkId;
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .WillOnce(FutureArg<1>(&frameworkId));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(frameworkId);
+
+  AWAIT_READY(offers);
+  ASSERT_NE(0u, offers.get().size());
+
+  // We request a sandbox (1MB) that is smaller than the persistent
+  // volume (4MB) and attempt to create a file in that volume that is
+  // twice the size of the sanbox (2MB).
+  Resources volume = createPersistentVolume(
+      Megabytes(4),
+      "role1",
+      "id1",
+      "volume_path");
+
+  Resources taskResources =
+      Resources::parse("cpus:1;mem:64;disk(role1):1").get() + volume;
+
+  // We sleep to give quota enforcement (du) a chance to kick in.
+  TaskInfo task = createTask(
+      offers.get()[0].slave_id(),
+      taskResources,
+      "dd if=/dev/zero of=volume_path/file bs=1048576 count=2 && sleep 1");
+
+  Future<TaskStatus> statusRunning;
+  Future<TaskStatus> statusFinished;
+
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&statusRunning))
+    .WillOnce(FutureArg<1>(&statusFinished));
+
+  driver.acceptOffers(
+      {offers.get()[0].id()},
+      {CREATE(volume),
+      LAUNCH({task})});
+
+  AWAIT_READY(statusRunning);
+  EXPECT_EQ(task.task_id(), statusRunning.get().task_id());
+  EXPECT_EQ(TASK_RUNNING, statusRunning.get().state());
+
+  AWAIT_READY(statusFinished);
+  EXPECT_EQ(task.task_id(), statusFinished.get().task_id());
+  EXPECT_EQ(TASK_FINISHED, statusFinished.get().state());
+
+  driver.stop();
+  driver.join();
+
+  Shutdown();
+}
+
 #endif // __linux__
 
 } // namespace tests {

http://git-wip-us.apache.org/repos/asf/mesos/blob/7ba659cd/src/tests/disk_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/disk_quota_tests.cpp b/src/tests/disk_quota_tests.cpp
index 1577cf7..eb93c2f 100644
--- a/src/tests/disk_quota_tests.cpp
+++ b/src/tests/disk_quota_tests.cpp
@@ -81,7 +81,7 @@ TEST_F(DiskUsageCollectorTest, File)
 
   DiskUsageCollector collector(Milliseconds(1));
 
-  Future<Bytes> usage = collector.usage(path);
+  Future<Bytes> usage = collector.usage(path, {});
   AWAIT_READY(usage);
 
   // NOTE: A typical file system needs more disk space to keep meta
@@ -110,7 +110,7 @@ TEST_F(DiskUsageCollectorTest, Directory)
 
   DiskUsageCollector collector(Milliseconds(1));
 
-  Future<Bytes> usage = collector.usage(os::getcwd());
+  Future<Bytes> usage = collector.usage(os::getcwd(), {});
   AWAIT_READY(usage);
 
   EXPECT_GE(usage.get(), Kilobytes(15));
@@ -129,8 +129,8 @@ TEST_F(DiskUsageCollectorTest, SymbolicLink)
 
   DiskUsageCollector collector(Milliseconds(1));
 
-  Future<Bytes> usage1 = collector.usage(os::getcwd());
-  Future<Bytes> usage2 = collector.usage(link);
+  Future<Bytes> usage1 = collector.usage(os::getcwd(), {});
+  Future<Bytes> usage2 = collector.usage(link, {});
 
   AWAIT_READY(usage1);
   EXPECT_GE(usage1.get(), Kilobytes(64));
@@ -141,6 +141,33 @@ TEST_F(DiskUsageCollectorTest, SymbolicLink)
 }
 
 
+#ifdef __linux__
+// This test verifies that relative exclude paths work and that
+// absolute ones don't (in cases when the directory path itself
+// is relative).
+TEST_F(DiskUsageCollectorTest, ExcludeRelativePath)
+{
+  string file = path::join(os::getcwd(), "file");
+
+  // Create a 128k file.
+  ASSERT_SOME(os::write(file, string(Kilobytes(128).bytes(), 'x')));
+
+  DiskUsageCollector collector(Milliseconds(1));
+
+  // Exclude 'file' and make sure the usage is way below 128k.
+  Future<Bytes> usage1 = collector.usage(os::getcwd(), {"file"});
+  AWAIT_READY(usage1);
+  EXPECT_LT(usage1.get(), Kilobytes(64));
+
+  // Exclude absolute path of 'file' with the relative directory
+  // path. Pattern matching is expected to fail causing exclude
+  // to have no effect.
+  Future<Bytes> usage2 = collector.usage(".", {file});
+  EXPECT_GT(usage2.get(), Kilobytes(128));
+}
+#endif
+
+
 class DiskQuotaTest : public MesosTest {};