You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/01/17 02:58:08 UTC

[2/4] mesos git commit: Made task's volume directory visible in the /files endpoints.

Made task's volume directory visible in the /files endpoints.

In MESOS-7225, we made a task can access any volumes specified in its
disk resources from its own sandbox by introducing a workaround to the
default executor, i.e., add a `SANDBOX_PATH` volume with type `PARENT`
to the corresponding nested container. It will be translated into a bind
mount in the nested container's mount namespace, thus not visible in the
host mount namespace, that means the task's volume directory can not be
visible in Mesos UI since it operates in the host mount namespace.

In this patch, to make the task's volume directory visible in Mesos UI,
we attached the executor's volume directory to it, so when users browse
task's volume directory in Mesos UI, what they actually browse is the
executor's volume directory. Note when calling `Files::attach()`, the
third argument `authorized` is not specified, that is because it is
already specified when we do the attach for the executor's sandbox and
it is also applied to the executor's tasks.

Review: https://reviews.apache.org/r/64978


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e126254e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e126254e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e126254e

Branch: refs/heads/master
Commit: e126254edd6abdad7f765dfa97ac8f695c88aca7
Parents: 5225a49
Author: Qian Zhang <zh...@gmail.com>
Authored: Fri Jan 5 23:33:44 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 10:04:30 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++--
 src/slave/slave.hpp |  23 +++++++++
 2 files changed, 147 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e126254e/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 155d9f0..45e6f9b 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4966,7 +4966,6 @@ void Slave::_statusUpdate(
     }
   }
 
-
   const TaskStatus& status = update.status();
 
   Executor* executor = getExecutor(update.framework_id(), executorId);
@@ -5873,7 +5872,24 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
   os::utime(path); // Update the modification time.
   garbageCollect(path)
-    .onAny(defer(self(), &Self::detachFile, path));
+    .onAny(defer(self(), [=](const Future<Nothing>& future) {
+      detachFile(path);
+
+      if (executor->info.has_type() &&
+          executor->info.type() == ExecutorInfo::DEFAULT) {
+        foreachvalue (const Task* task, executor->launchedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreachvalue (const Task* task, executor->terminatedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+      }
+    }));
 
   // Schedule the top level executor work directory, only if the
   // framework doesn't have any 'pending' tasks for this executor.
@@ -8333,7 +8349,7 @@ Executor* Framework::addExecutor(const ExecutorInfo& executorInfo)
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8541,7 +8557,7 @@ void Framework::recoverExecutor(
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8614,7 +8630,24 @@ void Framework::recoverExecutor(
         slave->flags.work_dir, slave->info.id(), id(), state.id, runId);
 
     slave->garbageCollect(path)
-       .onAny(defer(slave, &Slave::detachFile, path));
+      .onAny(defer(slave->self(), [=](const Future<Nothing>& future) {
+        slave->detachFile(path);
+
+        if (executor->info.has_type() &&
+            executor->info.type() == ExecutorInfo::DEFAULT) {
+          foreachvalue (const Task* task, executor->launchedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreachvalue (const Task* task, executor->terminatedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+        }
+      }));
 
     // GC the executor run's meta directory.
     slave->garbageCollect(paths::getExecutorRunPath(
@@ -8917,6 +8950,10 @@ Task* Executor::addLaunchedTask(const TaskInfo& task)
 
   launchedTasks[task.task_id()] = t;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*t);
+  }
+
   return t;
 }
 
@@ -8928,6 +8965,17 @@ void Executor::completeTask(const TaskID& taskId)
   CHECK(terminatedTasks.contains(taskId))
     << "Failed to find terminated task " << taskId;
 
+  // If `completedTasks` is full and this is a default executor, we need
+  // to detach the volume directory for the first task in `completedTasks`
+  // before pushing a task into it, otherwise, we will never have chance
+  // to do the detach for that task which would be a leak.
+  if (info.has_type() &&
+      info.type() == ExecutorInfo::DEFAULT &&
+      completedTasks.full()) {
+    const shared_ptr<Task>& firstTask = completedTasks.front();
+    detachTaskVolumeDirectory(*firstTask);
+  }
+
   Task* task = terminatedTasks[taskId];
   completedTasks.push_back(shared_ptr<Task>(task));
   terminatedTasks.erase(taskId);
@@ -8998,6 +9046,10 @@ void Executor::recoverTask(const TaskState& state, bool recheckpointTask)
 
   launchedTasks[state.id] = task;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*task);
+  }
+
   // Read updates to get the latest state of the task.
   foreach (const StatusUpdate& update, state.updates) {
     Try<Nothing> updated = updateTaskState(update.status());
@@ -9103,6 +9155,73 @@ bool Executor::incompleteTasks()
 }
 
 
+void Executor::attachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string executorVolumePath =
+      path::join(directory, volume.container_path());
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->attach(executorVolumePath, taskVolumePath)
+      .onAny(defer(
+          slave,
+          &Slave::fileAttached,
+          lambda::_1,
+          executorVolumePath,
+          taskVolumePath));
+  }
+}
+
+
+void Executor::detachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->detach(taskVolumePath);
+  }
+}
+
+
 bool Executor::isGeneratedForCommandTask() const
 {
   return isGeneratedForCommandTask_;

http://git-wip-us.apache.org/repos/asf/mesos/blob/e126254e/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index ef0eae2..a07f046 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -833,6 +833,29 @@ public:
   // Returns true if there are any queued/launched/terminated tasks.
   bool incompleteTasks();
 
+  // TODO(qianzhang): This is a workaround to make the default executor
+  // task's volume directory visible in MESOS UI. In MESOS-7225, we made
+  // sure a task can access any volumes specified in its disk resources
+  // from its sandbox by introducing a workaround to the default executor,
+  // i.e., adding a `SANDBOX_PATH` volume with type `PARENT` to the
+  // corresponding nested container. This volume gets translated into a
+  // bind mount in the nested container's mount namespace, which is is not
+  // visible in Mesos UI because it operates in the host namespace. See
+  // Mesos-8279 for details.
+  //
+  // To make the task's volume directory visible in Mesos UI, here we
+  // attach the executor's volume directory to it, so when users browse
+  // task's volume directory in Mesos UI, what they actually browse is the
+  // executor's volume directory. Note when calling `Files::attach()`, the
+  // third argument `authorized` is not specified because it is already
+  // specified when we do the attach for the executor's sandbox and it also
+  // applies to the executor's tasks.
+  void attachTaskVolumeDirectory(const Task& task);
+
+  // TODO(qianzhang): Remove the task's volume directory from the /files
+  // endpoint. This is a workaround for MESOS-8279.
+  void detachTaskVolumeDirectory(const Task& task);
+
   // Sends a message to the connected executor.
   template <typename Message>
   void send(const Message& message)