You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jp...@apache.org on 2019/08/08 04:54:51 UTC

[mesos] 03/06: Add `disk/xfs` isolator support for ephemeral volumes.

This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 3446ca6f05152fb34f607bfe69ad3175e6a5e806
Author: James Peach <jp...@apache.org>
AuthorDate: Wed Aug 7 20:22:03 2019 -0700

    Add `disk/xfs` isolator support for ephemeral volumes.
    
    Add support for labeling ephemeral volumes with the sandbox XFS
    project ID. This makes changes to the container rootfs share the
    same disk quota as the sandbox.
    
    Review: https://reviews.apache.org/r/71194/
---
 .../containerizer/mesos/isolators/xfs/disk.cpp     | 152 +++++++++++++++++++--
 .../mesos/provisioner/backends/overlay.cpp         |  21 ++-
 .../mesos/provisioner/backends/overlay.hpp         |   6 +
 3 files changed, 166 insertions(+), 13 deletions(-)

diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
index 5454432..1680a59 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -38,6 +38,8 @@
 
 #include "slave/paths.hpp"
 
+#include "slave/containerizer/mesos/provisioner/backends/overlay.hpp"
+
 using std::list;
 using std::make_pair;
 using std::pair;
@@ -317,6 +319,64 @@ Future<Nothing> XfsDiskIsolatorProcess::recover(
     }
   }
 
+  foreach (const ContainerState& state, states) {
+    foreach (const string& directory, state.ephemeral_volumes()) {
+      Result<prid_t> projectId = xfs::getProjectId(directory);
+      if (projectId.isError()) {
+        return Failure(projectId.error());
+      }
+
+      // Ephemeral volumes should have been assigned a project ID, but
+      // that's not atommic, so if we were killed during a container
+      // launch, we can't guarantee that we labeled all the ephemeral
+      // volumes.
+      if (projectId.isNone()) {
+        if (!infos.contains(state.container_id())) {
+          LOG(WARNING) << "Missing project ID for ephemeral volume at '"
+                       << directory << "'";
+          continue;
+        }
+
+        // We have an unlabeled ephemeral volume for a known container. Find
+        // the corresponding sandbox path to get the right project ID.
+        const Owned<Info>& info = infos.at(state.container_id());
+
+        foreachpair (
+            const string& directory,
+            const Info::PathInfo& pathInfo,
+            info->paths) {
+          // Skip persistent volumes.
+          if (pathInfo.disk.isSome()) {
+            continue;
+          }
+
+          Try<Nothing> status =
+            xfs::setProjectId(directory, pathInfo.projectId);
+          if (status.isError()) {
+            return Failure(
+                "Failed to assign project " +
+                stringify(projectId.get()) + ": " + status.error());
+          }
+
+          break;
+        }
+      }
+
+      Try<Nothing> scheduled = scheduleProjectRoot(projectId.get(), directory);
+      if (scheduled.isError()) {
+        return Failure(
+            "Unable to schedule project ID " + stringify(projectId.get()) +
+            " for reclaimation: " + scheduled.error());
+      }
+
+      // If we are still managing this project ID, we should have
+      // tracked it when we added sandboxes above.
+      if (totalProjectIds.contains(projectId.get())) {
+          CHECK_NOT_CONTAINS(freeProjectIds, projectId.get());
+      }
+    }
+  }
+
   Try<list<string>> volumes = paths::getPersistentVolumePaths(workDir);
 
   if (volumes.isError()) {
@@ -355,8 +415,52 @@ Future<Nothing> XfsDiskIsolatorProcess::recover(
 
     Try<Nothing> scheduled = scheduleProjectRoot(projectId.get(), directory);
     if (scheduled.isError()) {
-      LOG(ERROR) << "Unable to schedule project ID " << projectId.get()
-                 << " for reclaimation: " << scheduled.error();
+      return Failure(
+          "Unable to schedule project ID " + stringify(projectId.get()) +
+          " for reclaimation: " + scheduled.error());
+    }
+  }
+
+  // Scan ephemeral provisioner directories to pick up any project IDs that
+  // aren't already captured by the recovered container states. Admittedly,
+  // it's a bit hacky to specifically check the overlay backend here,
+  // but it's not really worse than the sandbox scanning we do above.
+  Try<list<string>> provisionerDirs =
+    OverlayBackend::listEphemeralVolumes(workDir);
+
+  if (provisionerDirs.isError()) {
+    return Failure("Failed to scan overlay provisioner directories: " +
+                   provisionerDirs.error());
+  }
+
+  foreach (const string& directory, provisionerDirs.get()) {
+    if (!os::stat::isdir(directory)) {
+      continue;
+    }
+
+    Result<prid_t> projectId = xfs::getProjectId(directory);
+    if (projectId.isError()) {
+      return Failure(projectId.error());
+    }
+
+    // Not all provisioner directories will have a project IDs.
+    if (projectId.isNone()) {
+      continue;
+    }
+
+    // It is likely we counted this project ID when we recovered the
+    // containers, so don't double-count.
+    if (totalProjectIds.contains(projectId.get()) &&
+        freeProjectIds.contains(projectId.get())) {
+      --metrics.project_ids_free;
+      freeProjectIds -= projectId.get();
+    }
+
+    Try<Nothing> scheduled = scheduleProjectRoot(projectId.get(), directory);
+    if (scheduled.isError()) {
+      return Failure(
+          "Unable to schedule project ID " + stringify(projectId.get()) +
+          " for reclaimation: " + scheduled.error());
     }
   }
 
@@ -395,8 +499,30 @@ Future<Option<ContainerLaunchInfo>> XfsDiskIsolatorProcess::prepare(
         status.error());
   }
 
-  LOG(INFO) << "Assigned project " << stringify(projectId.get()) << " to '"
-            << containerConfig.directory() << "'";
+  LOG(INFO) << "Assigned project " << stringify(projectId.get())
+            << " to '" << containerConfig.directory() << "'";
+
+  // The ephemeral volumes share the same quota as the sandbox, so label
+  // them with the project ID now.
+  foreach (const string& directory, containerConfig.ephemeral_volumes()) {
+    Try<Nothing> status = xfs::setProjectId(directory, projectId.get());
+
+    if (status.isError()) {
+      return Failure(
+          "Failed to assign project " + stringify(projectId.get()) + ": " +
+          status.error());
+    }
+
+    LOG(INFO) << "Assigned project " << stringify(projectId.get())
+              << " to '" << directory << "'";
+
+    Try<Nothing> scheduled = scheduleProjectRoot(projectId.get(), directory);
+    if (scheduled.isError()) {
+      return Failure(
+          "Unable to schedule project ID " + stringify(projectId.get()) +
+          " for reclaimation: " + scheduled.error());
+    }
+  }
 
   return update(containerId, containerConfig.resources())
     .then([]() -> Future<Option<ContainerLaunchInfo>> {
@@ -409,7 +535,7 @@ Future<ContainerLimitation> XfsDiskIsolatorProcess::watch(
     const ContainerID& containerId)
 {
   if (infos.contains(containerId)) {
-    return infos[containerId]->limitation.future();
+    return infos.at(containerId)->limitation.future();
   }
 
   // Any container that did not have a project ID assigned when
@@ -477,7 +603,7 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
     return Nothing();
   }
 
-  const Owned<Info>& info = infos[containerId];
+  const Owned<Info>& info = infos.at(containerId);
 
   // First, apply the disk quota to the sandbox.
   Option<Bytes> sandboxQuota = getSandboxDisk(resources);
@@ -565,8 +691,9 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
 
     Try<Nothing> scheduled = scheduleProjectRoot(projectId.get(), directory);
     if (scheduled.isError()) {
-      LOG(ERROR) << "Unable to schedule project " << projectId.get()
-                  << " for reclaimation: " << scheduled.error();
+      return Failure(
+          "Unable to schedule project " + stringify(projectId.get()) +
+          " for reclaimation: " + scheduled.error());
     }
   }
 
@@ -637,7 +764,7 @@ Future<ResourceStatistics> XfsDiskIsolatorProcess::usage(
     return ResourceStatistics();
   }
 
-  const Owned<Info>& info = infos[containerId];
+  const Owned<Info>& info = infos.at(containerId);
   ResourceStatistics statistics;
 
   foreachpair(
@@ -706,7 +833,7 @@ Future<Nothing> XfsDiskIsolatorProcess::cleanup(const ContainerID& containerId)
     return Nothing();
   }
 
-  const Owned<Info>& info = infos[containerId];
+  const Owned<Info>& info = infos.at(containerId);
 
   // Schedule the directory for project ID reclaimation.
   //
@@ -724,8 +851,9 @@ Future<Nothing> XfsDiskIsolatorProcess::cleanup(const ContainerID& containerId)
       const string& directory, const Info::PathInfo& pathInfo, info->paths) {
     Try<Nothing> scheduled = scheduleProjectRoot(pathInfo.projectId, directory);
     if (scheduled.isError()) {
-      LOG(ERROR) << "Unable to schedule project " << pathInfo.projectId
-                 << " for reclaimation: " << scheduled.error();
+      return Failure(
+          "Unable to schedule project " + stringify(pathInfo.projectId) +
+          " for reclaimation: " + scheduled.error());
     }
   }
 
diff --git a/src/slave/containerizer/mesos/provisioner/backends/overlay.cpp b/src/slave/containerizer/mesos/provisioner/backends/overlay.cpp
index 77d6711..cf261d1 100644
--- a/src/slave/containerizer/mesos/provisioner/backends/overlay.cpp
+++ b/src/slave/containerizer/mesos/provisioner/backends/overlay.cpp
@@ -14,6 +14,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "slave/containerizer/mesos/provisioner/backends/overlay.hpp"
+
 #include <process/dispatch.hpp>
 #include <process/id.hpp>
 #include <process/process.hpp>
@@ -27,7 +29,9 @@
 
 #include "linux/fs.hpp"
 
-#include "slave/containerizer/mesos/provisioner/backends/overlay.hpp"
+#include "slave/paths.hpp"
+
+#include "slave/containerizer/mesos/provisioner/constants.hpp"
 
 using process::Failure;
 using process::Future;
@@ -63,6 +67,21 @@ public:
 };
 
 
+Try<std::list<std::string>> OverlayBackend::listEphemeralVolumes(
+    const string& workDir)
+{
+  return os::glob(path::join(
+    paths::getProvisionerDir(workDir),
+    "containers",
+    "*", /* ContainerID */
+    "backends",
+    OVERLAY_BACKEND, /* backendDir */
+    "scratch"
+    "*", /* rootfs ID */
+    "*"));
+}
+
+
 Try<Owned<Backend>> OverlayBackend::create(const Flags&)
 {
   if (geteuid() != 0) {
diff --git a/src/slave/containerizer/mesos/provisioner/backends/overlay.hpp b/src/slave/containerizer/mesos/provisioner/backends/overlay.hpp
index 78896b6..e116064 100644
--- a/src/slave/containerizer/mesos/provisioner/backends/overlay.hpp
+++ b/src/slave/containerizer/mesos/provisioner/backends/overlay.hpp
@@ -17,6 +17,9 @@
 #ifndef __MESOS_PROVISIONER_OVERLAY_HPP__
 #define __MESOS_PROVISIONER_OVERLAY_HPP__
 
+#include <list>
+#include <string>
+
 #include "slave/containerizer/mesos/provisioner/backend.hpp"
 
 namespace mesos {
@@ -49,6 +52,9 @@ public:
 
   static Try<process::Owned<Backend>> create(const Flags&);
 
+  static Try<std::list<std::string>> listEphemeralVolumes(
+      const std::string& workDir);
+
   process::Future<Option<std::vector<Path>>> provision(
       const std::vector<std::string>& layers,
       const std::string& rootfs,