You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/01/17 04:13:06 UTC

[1/4] mesos git commit: Made task's volume directory visible in the /files endpoints.

Repository: mesos
Updated Branches:
  refs/heads/1.5.x af64bcb38 -> 1d9b5de7d


Made task's volume directory visible in the /files endpoints.

In MESOS-7225, we made a task can access any volumes specified in its
disk resources from its own sandbox by introducing a workaround to the
default executor, i.e., add a `SANDBOX_PATH` volume with type `PARENT`
to the corresponding nested container. It will be translated into a bind
mount in the nested container's mount namespace, thus not visible in the
host mount namespace, that means the task's volume directory can not be
visible in Mesos UI since it operates in the host mount namespace.

In this patch, to make the task's volume directory visible in Mesos UI,
we attached the executor's volume directory to it, so when users browse
task's volume directory in Mesos UI, what they actually browse is the
executor's volume directory. Note when calling `Files::attach()`, the
third argument `authorized` is not specified, that is because it is
already specified when we do the attach for the executor's sandbox and
it is also applied to the executor's tasks.

Review: https://reviews.apache.org/r/64978


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f2466a76
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f2466a76
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f2466a76

Branch: refs/heads/1.5.x
Commit: f2466a76e2250b64f54879bdb70d358d06762754
Parents: a198637
Author: Qian Zhang <zh...@gmail.com>
Authored: Fri Jan 5 23:33:44 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 11:55:36 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++--
 src/slave/slave.hpp |  23 +++++++++
 2 files changed, 147 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f2466a76/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index aefea9c..4a37041 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4964,7 +4964,6 @@ void Slave::_statusUpdate(
     }
   }
 
-
   const TaskStatus& status = update.status();
 
   Executor* executor = getExecutor(update.framework_id(), executorId);
@@ -5871,7 +5870,24 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
   os::utime(path); // Update the modification time.
   garbageCollect(path)
-    .onAny(defer(self(), &Self::detachFile, path));
+    .onAny(defer(self(), [=](const Future<Nothing>& future) {
+      detachFile(path);
+
+      if (executor->info.has_type() &&
+          executor->info.type() == ExecutorInfo::DEFAULT) {
+        foreachvalue (const Task* task, executor->launchedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreachvalue (const Task* task, executor->terminatedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+      }
+    }));
 
   // Schedule the top level executor work directory, only if the
   // framework doesn't have any 'pending' tasks for this executor.
@@ -8317,7 +8333,7 @@ Executor* Framework::addExecutor(const ExecutorInfo& executorInfo)
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8525,7 +8541,7 @@ void Framework::recoverExecutor(
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8598,7 +8614,24 @@ void Framework::recoverExecutor(
         slave->flags.work_dir, slave->info.id(), id(), state.id, runId);
 
     slave->garbageCollect(path)
-       .onAny(defer(slave, &Slave::detachFile, path));
+      .onAny(defer(slave->self(), [=](const Future<Nothing>& future) {
+        slave->detachFile(path);
+
+        if (executor->info.has_type() &&
+            executor->info.type() == ExecutorInfo::DEFAULT) {
+          foreachvalue (const Task* task, executor->launchedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreachvalue (const Task* task, executor->terminatedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+        }
+      }));
 
     // GC the executor run's meta directory.
     slave->garbageCollect(paths::getExecutorRunPath(
@@ -8901,6 +8934,10 @@ Task* Executor::addLaunchedTask(const TaskInfo& task)
 
   launchedTasks[task.task_id()] = t;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*t);
+  }
+
   return t;
 }
 
@@ -8912,6 +8949,17 @@ void Executor::completeTask(const TaskID& taskId)
   CHECK(terminatedTasks.contains(taskId))
     << "Failed to find terminated task " << taskId;
 
+  // If `completedTasks` is full and this is a default executor, we need
+  // to detach the volume directory for the first task in `completedTasks`
+  // before pushing a task into it, otherwise, we will never have chance
+  // to do the detach for that task which would be a leak.
+  if (info.has_type() &&
+      info.type() == ExecutorInfo::DEFAULT &&
+      completedTasks.full()) {
+    const shared_ptr<Task>& firstTask = completedTasks.front();
+    detachTaskVolumeDirectory(*firstTask);
+  }
+
   Task* task = terminatedTasks[taskId];
   completedTasks.push_back(shared_ptr<Task>(task));
   terminatedTasks.erase(taskId);
@@ -8982,6 +9030,10 @@ void Executor::recoverTask(const TaskState& state, bool recheckpointTask)
 
   launchedTasks[state.id] = task;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*task);
+  }
+
   // Read updates to get the latest state of the task.
   foreach (const StatusUpdate& update, state.updates) {
     Try<Nothing> updated = updateTaskState(update.status());
@@ -9087,6 +9139,73 @@ bool Executor::incompleteTasks()
 }
 
 
+void Executor::attachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string executorVolumePath =
+      path::join(directory, volume.container_path());
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->attach(executorVolumePath, taskVolumePath)
+      .onAny(defer(
+          slave,
+          &Slave::fileAttached,
+          lambda::_1,
+          executorVolumePath,
+          taskVolumePath));
+  }
+}
+
+
+void Executor::detachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->detach(taskVolumePath);
+  }
+}
+
+
 bool Executor::isGeneratedForCommandTask() const
 {
   return isGeneratedForCommandTask_;

http://git-wip-us.apache.org/repos/asf/mesos/blob/f2466a76/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index ef0eae2..a07f046 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -833,6 +833,29 @@ public:
   // Returns true if there are any queued/launched/terminated tasks.
   bool incompleteTasks();
 
+  // TODO(qianzhang): This is a workaround to make the default executor
+  // task's volume directory visible in MESOS UI. In MESOS-7225, we made
+  // sure a task can access any volumes specified in its disk resources
+  // from its sandbox by introducing a workaround to the default executor,
+  // i.e., adding a `SANDBOX_PATH` volume with type `PARENT` to the
+  // corresponding nested container. This volume gets translated into a
+  // bind mount in the nested container's mount namespace, which is is not
+  // visible in Mesos UI because it operates in the host namespace. See
+  // Mesos-8279 for details.
+  //
+  // To make the task's volume directory visible in Mesos UI, here we
+  // attach the executor's volume directory to it, so when users browse
+  // task's volume directory in Mesos UI, what they actually browse is the
+  // executor's volume directory. Note when calling `Files::attach()`, the
+  // third argument `authorized` is not specified because it is already
+  // specified when we do the attach for the executor's sandbox and it also
+  // applies to the executor's tasks.
+  void attachTaskVolumeDirectory(const Task& task);
+
+  // TODO(qianzhang): Remove the task's volume directory from the /files
+  // endpoint. This is a workaround for MESOS-8279.
+  void detachTaskVolumeDirectory(const Task& task);
+
   // Sends a message to the connected executor.
   template <typename Message>
   void send(const Message& message)


[2/4] mesos git commit: Detached the virtual paths regardless of the result of gc.

Posted by qi...@apache.org.
Detached the virtual paths regardless of the result of gc.

Previously we only detach the following paths when the gc for the
executor's sandbox succeeds.
  1. /agent_workdir/frameworks/FID/executors/EID/runs/CID
  2. /agent_workdir/frameworks/FID/executors/EID/runs/latest
  3. /frameworks/FID/executors/EID/runs/latest

But the problem is, such gc may not always succeed, e.g., it may fail
due to the parent directory of the executor's sandbox already gc'ed.

Now in this patch, we will detach those paths regardless of the result
of gc.

Review: https://reviews.apache.org/r/65156


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a198637d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a198637d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a198637d

Branch: refs/heads/1.5.x
Commit: a198637d3805765d11a2474b3a66097592c9b065
Parents: af64bcb
Author: Qian Zhang <zh...@gmail.com>
Authored: Sun Jan 14 22:02:33 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 11:55:36 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a198637d/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 3e7281f..aefea9c 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5871,7 +5871,7 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
   os::utime(path); // Update the modification time.
   garbageCollect(path)
-    .then(defer(self(), &Self::detachFile, path));
+    .onAny(defer(self(), &Self::detachFile, path));
 
   // Schedule the top level executor work directory, only if the
   // framework doesn't have any 'pending' tasks for this executor.
@@ -5895,10 +5895,9 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
     os::utime(path); // Update the modification time.
     garbageCollect(path)
-      .then(defer(self(), [=]() {
+      .onAny(defer(self(), [=](const Future<Nothing>& future) {
         detachFile(latestPath);
         detachFile(virtualLatestPath);
-        return Nothing();
       }));
   }
 
@@ -8599,7 +8598,7 @@ void Framework::recoverExecutor(
         slave->flags.work_dir, slave->info.id(), id(), state.id, runId);
 
     slave->garbageCollect(path)
-       .then(defer(slave, &Slave::detachFile, path));
+       .onAny(defer(slave, &Slave::detachFile, path));
 
     // GC the executor run's meta directory.
     slave->garbageCollect(paths::getExecutorRunPath(
@@ -8608,7 +8607,7 @@ void Framework::recoverExecutor(
     // GC the top level executor work directory.
     slave->garbageCollect(paths::getExecutorPath(
         slave->flags.work_dir, slave->info.id(), id(), state.id))
-        .then(defer(slave, &Slave::detachFile, latestPath));
+        .onAny(defer(slave, &Slave::detachFile, latestPath));
 
     // GC the top level executor meta directory.
     slave->garbageCollect(paths::getExecutorPath(


[4/4] mesos git commit: Added MESOS-8279, MESOS-8444 and MESOS-8446 to the 1.5.0 CHANGELOG.

Posted by qi...@apache.org.
Added MESOS-8279, MESOS-8444 and MESOS-8446 to the 1.5.0 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1d9b5de7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1d9b5de7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1d9b5de7

Branch: refs/heads/1.5.x
Commit: 1d9b5de7d6ad796f5b0ea8e9c4e5541dda6984a4
Parents: f25c9b3
Author: Qian Zhang <zh...@gmail.com>
Authored: Wed Jan 17 12:06:32 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 12:08:49 2018 +0800

----------------------------------------------------------------------
 CHANGELOG | 8 ++++++++
 1 file changed, 8 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1d9b5de7/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index fa0d03a..47e1150 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -61,6 +61,14 @@ Feature Graduations:
 All Experimental Features:
 
 
+All Resolved Issues:
+
+** Bug
+  * [MESOS-8279] - Persistent volumes are not visible in Mesos UI using default executor on Linux.
+  * [MESOS-8444] - GC failure causes agent miss to detach virtual paths for the executor's sandbox.
+  * [MESOS-8446] - Agent miss to detach `virtualLatestPath` for the executor's sandbox during recovery.
+
+
 Release Notes - Mesos - Version 1.4.2 (WIP)
 -------------------------------------------
 * This is a bug fix release.


[3/4] mesos git commit: Detached `virtualLatestPath` when recovering the executor.

Posted by qi...@apache.org.
Detached `virtualLatestPath` when recovering the executor.

Previously we miss to detach `/frameworks/FID/executors/EID/runs/latest`
when we find the latest run of the executor was completed in the method
`Framework::recoverExecutor`, that is a leak.

Review: https://reviews.apache.org/r/65167


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f25c9b3f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f25c9b3f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f25c9b3f

Branch: refs/heads/1.5.x
Commit: f25c9b3f77df9f4edc250e18be7392391d19d024
Parents: f2466a7
Author: Qian Zhang <zh...@gmail.com>
Authored: Mon Jan 15 16:40:00 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 12:08:49 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f25c9b3f/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 4a37041..956f79d 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -8640,7 +8640,10 @@ void Framework::recoverExecutor(
     // GC the top level executor work directory.
     slave->garbageCollect(paths::getExecutorPath(
         slave->flags.work_dir, slave->info.id(), id(), state.id))
-        .onAny(defer(slave, &Slave::detachFile, latestPath));
+        .onAny(defer(slave->self(), [=](const Future<Nothing>& future) {
+          slave->detachFile(latestPath);
+          slave->detachFile(virtualLatestPath);
+        }));
 
     // GC the top level executor meta directory.
     slave->garbageCollect(paths::getExecutorPath(