You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/01/17 02:58:07 UTC

[1/4] mesos git commit: Detached `virtualLatestPath` when recovering the executor.

Repository: mesos
Updated Branches:
  refs/heads/master 9e2f9a240 -> 2c5da1b66


Detached `virtualLatestPath` when recovering the executor.

Previously we miss to detach `/frameworks/FID/executors/EID/runs/latest`
when we find the latest run of the executor was completed in the method
`Framework::recoverExecutor`, that is a leak.

Review: https://reviews.apache.org/r/65167


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2c5da1b6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2c5da1b6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2c5da1b6

Branch: refs/heads/master
Commit: 2c5da1b668de91e33831caafb18a3b4d71b26c69
Parents: 9585a21
Author: Qian Zhang <zh...@gmail.com>
Authored: Mon Jan 15 16:40:00 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 10:04:30 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2c5da1b6/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 45e6f9b..1672c06 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -8656,7 +8656,10 @@ void Framework::recoverExecutor(
     // GC the top level executor work directory.
     slave->garbageCollect(paths::getExecutorPath(
         slave->flags.work_dir, slave->info.id(), id(), state.id))
-        .onAny(defer(slave, &Slave::detachFile, latestPath));
+        .onAny(defer(slave->self(), [=](const Future<Nothing>& future) {
+          slave->detachFile(latestPath);
+          slave->detachFile(virtualLatestPath);
+        }));
 
     // GC the top level executor meta directory.
     slave->garbageCollect(paths::getExecutorPath(


[2/4] mesos git commit: Made task's volume directory visible in the /files endpoints.

Posted by qi...@apache.org.
Made task's volume directory visible in the /files endpoints.

In MESOS-7225, we made a task can access any volumes specified in its
disk resources from its own sandbox by introducing a workaround to the
default executor, i.e., add a `SANDBOX_PATH` volume with type `PARENT`
to the corresponding nested container. It will be translated into a bind
mount in the nested container's mount namespace, thus not visible in the
host mount namespace, that means the task's volume directory can not be
visible in Mesos UI since it operates in the host mount namespace.

In this patch, to make the task's volume directory visible in Mesos UI,
we attached the executor's volume directory to it, so when users browse
task's volume directory in Mesos UI, what they actually browse is the
executor's volume directory. Note when calling `Files::attach()`, the
third argument `authorized` is not specified, that is because it is
already specified when we do the attach for the executor's sandbox and
it is also applied to the executor's tasks.

Review: https://reviews.apache.org/r/64978


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e126254e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e126254e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e126254e

Branch: refs/heads/master
Commit: e126254edd6abdad7f765dfa97ac8f695c88aca7
Parents: 5225a49
Author: Qian Zhang <zh...@gmail.com>
Authored: Fri Jan 5 23:33:44 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 10:04:30 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++--
 src/slave/slave.hpp |  23 +++++++++
 2 files changed, 147 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e126254e/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 155d9f0..45e6f9b 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4966,7 +4966,6 @@ void Slave::_statusUpdate(
     }
   }
 
-
   const TaskStatus& status = update.status();
 
   Executor* executor = getExecutor(update.framework_id(), executorId);
@@ -5873,7 +5872,24 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
   os::utime(path); // Update the modification time.
   garbageCollect(path)
-    .onAny(defer(self(), &Self::detachFile, path));
+    .onAny(defer(self(), [=](const Future<Nothing>& future) {
+      detachFile(path);
+
+      if (executor->info.has_type() &&
+          executor->info.type() == ExecutorInfo::DEFAULT) {
+        foreachvalue (const Task* task, executor->launchedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreachvalue (const Task* task, executor->terminatedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+
+        foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+          executor->detachTaskVolumeDirectory(*task);
+        }
+      }
+    }));
 
   // Schedule the top level executor work directory, only if the
   // framework doesn't have any 'pending' tasks for this executor.
@@ -8333,7 +8349,7 @@ Executor* Framework::addExecutor(const ExecutorInfo& executorInfo)
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8541,7 +8557,7 @@ void Framework::recoverExecutor(
           executorId);
     };
 
-  // We expose the executor's sandbox in the /files endpoints
+  // We expose the executor's sandbox in the /files endpoint
   // via the following paths:
   //
   //  (1) /agent_workdir/frameworks/FID/executors/EID/runs/CID
@@ -8614,7 +8630,24 @@ void Framework::recoverExecutor(
         slave->flags.work_dir, slave->info.id(), id(), state.id, runId);
 
     slave->garbageCollect(path)
-       .onAny(defer(slave, &Slave::detachFile, path));
+      .onAny(defer(slave->self(), [=](const Future<Nothing>& future) {
+        slave->detachFile(path);
+
+        if (executor->info.has_type() &&
+            executor->info.type() == ExecutorInfo::DEFAULT) {
+          foreachvalue (const Task* task, executor->launchedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreachvalue (const Task* task, executor->terminatedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+
+          foreach (const shared_ptr<Task>& task, executor->completedTasks) {
+            executor->detachTaskVolumeDirectory(*task);
+          }
+        }
+      }));
 
     // GC the executor run's meta directory.
     slave->garbageCollect(paths::getExecutorRunPath(
@@ -8917,6 +8950,10 @@ Task* Executor::addLaunchedTask(const TaskInfo& task)
 
   launchedTasks[task.task_id()] = t;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*t);
+  }
+
   return t;
 }
 
@@ -8928,6 +8965,17 @@ void Executor::completeTask(const TaskID& taskId)
   CHECK(terminatedTasks.contains(taskId))
     << "Failed to find terminated task " << taskId;
 
+  // If `completedTasks` is full and this is a default executor, we need
+  // to detach the volume directory for the first task in `completedTasks`
+  // before pushing a task into it, otherwise, we will never have chance
+  // to do the detach for that task which would be a leak.
+  if (info.has_type() &&
+      info.type() == ExecutorInfo::DEFAULT &&
+      completedTasks.full()) {
+    const shared_ptr<Task>& firstTask = completedTasks.front();
+    detachTaskVolumeDirectory(*firstTask);
+  }
+
   Task* task = terminatedTasks[taskId];
   completedTasks.push_back(shared_ptr<Task>(task));
   terminatedTasks.erase(taskId);
@@ -8998,6 +9046,10 @@ void Executor::recoverTask(const TaskState& state, bool recheckpointTask)
 
   launchedTasks[state.id] = task;
 
+  if (info.has_type() && info.type() == ExecutorInfo::DEFAULT) {
+    attachTaskVolumeDirectory(*task);
+  }
+
   // Read updates to get the latest state of the task.
   foreach (const StatusUpdate& update, state.updates) {
     Try<Nothing> updated = updateTaskState(update.status());
@@ -9103,6 +9155,73 @@ bool Executor::incompleteTasks()
 }
 
 
+void Executor::attachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string executorVolumePath =
+      path::join(directory, volume.container_path());
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->attach(executorVolumePath, taskVolumePath)
+      .onAny(defer(
+          slave,
+          &Slave::fileAttached,
+          lambda::_1,
+          executorVolumePath,
+          taskVolumePath));
+  }
+}
+
+
+void Executor::detachTaskVolumeDirectory(const Task& task)
+{
+  CHECK(info.has_type() && info.type() == ExecutorInfo::DEFAULT);
+
+  foreach (const Resource& resource, task.resources()) {
+    // Ignore if there are no disk resources or if the
+    // disk resources did not specify a volume mapping.
+    if (!resource.has_disk() || !resource.disk().has_volume()) {
+      continue;
+    }
+
+    const Volume& volume = resource.disk().volume();
+
+    const string taskPath = paths::getTaskPath(
+        slave->flags.work_dir,
+        slave->info.id(),
+        frameworkId,
+        id,
+        containerId,
+        task.task_id());
+
+    const string taskVolumePath =
+      path::join(taskPath, volume.container_path());
+
+    slave->files->detach(taskVolumePath);
+  }
+}
+
+
 bool Executor::isGeneratedForCommandTask() const
 {
   return isGeneratedForCommandTask_;

http://git-wip-us.apache.org/repos/asf/mesos/blob/e126254e/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index ef0eae2..a07f046 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -833,6 +833,29 @@ public:
   // Returns true if there are any queued/launched/terminated tasks.
   bool incompleteTasks();
 
+  // TODO(qianzhang): This is a workaround to make the default executor
+  // task's volume directory visible in MESOS UI. In MESOS-7225, we made
+  // sure a task can access any volumes specified in its disk resources
+  // from its sandbox by introducing a workaround to the default executor,
+  // i.e., adding a `SANDBOX_PATH` volume with type `PARENT` to the
+  // corresponding nested container. This volume gets translated into a
+  // bind mount in the nested container's mount namespace, which is is not
+  // visible in Mesos UI because it operates in the host namespace. See
+  // Mesos-8279 for details.
+  //
+  // To make the task's volume directory visible in Mesos UI, here we
+  // attach the executor's volume directory to it, so when users browse
+  // task's volume directory in Mesos UI, what they actually browse is the
+  // executor's volume directory. Note when calling `Files::attach()`, the
+  // third argument `authorized` is not specified because it is already
+  // specified when we do the attach for the executor's sandbox and it also
+  // applies to the executor's tasks.
+  void attachTaskVolumeDirectory(const Task& task);
+
+  // TODO(qianzhang): Remove the task's volume directory from the /files
+  // endpoint. This is a workaround for MESOS-8279.
+  void detachTaskVolumeDirectory(const Task& task);
+
   // Sends a message to the connected executor.
   template <typename Message>
   void send(const Message& message)


[3/4] mesos git commit: Updated `ROOT_TaskSandboxPersistentVolume` to check `/files` endpoint.

Posted by qi...@apache.org.
Updated `ROOT_TaskSandboxPersistentVolume` to check `/files` endpoint.

Review: https://reviews.apache.org/r/65070


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9585a217
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9585a217
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9585a217

Branch: refs/heads/master
Commit: 9585a2173970589f91858301c66479827c1370a9
Parents: e126254
Author: Qian Zhang <zh...@gmail.com>
Authored: Wed Jan 10 21:26:45 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 10:04:30 2018 +0800

----------------------------------------------------------------------
 src/tests/default_executor_tests.cpp | 82 +++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9585a217/src/tests/default_executor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/default_executor_tests.cpp b/src/tests/default_executor_tests.cpp
index d2cf5fd..065eae6 100644
--- a/src/tests/default_executor_tests.cpp
+++ b/src/tests/default_executor_tests.cpp
@@ -2413,7 +2413,8 @@ TEST_P_TEMP_DISABLED_ON_WINDOWS(
 
 
 // This test verifies that the default executor mounts the persistent volume
-// in the task container when it is set on a task in the task group.
+// in the task container when it is set on a task in the task group, and the
+// task's volume directory can be accessed from the `/files` endpoint.
 TEST_P_TEMP_DISABLED_ON_WINDOWS(
     PersistentVolumeDefaultExecutor, ROOT_TaskSandboxPersistentVolume)
 {
@@ -2489,7 +2490,7 @@ TEST_P_TEMP_DISABLED_ON_WINDOWS(
   v1::TaskInfo taskInfo = v1::createTask(
       offer.agent_id(),
       reserved.apply(v1::CREATE(volume)).get(),
-      "echo abc > task_volume_path/file");
+      "echo abc > task_volume_path/file && sleep 1000");
 
   v1::Offer::Operation reserve = v1::RESERVE(reserved);
   v1::Offer::Operation create = v1::CREATE(volume);
@@ -2499,7 +2500,6 @@ TEST_P_TEMP_DISABLED_ON_WINDOWS(
 
   Future<Event::Update> updateStarting;
   Future<Event::Update> updateRunning;
-  Future<Event::Update> updateFinished;
   EXPECT_CALL(*scheduler, update(_, _))
     .WillOnce(DoAll(FutureArg<1>(&updateStarting),
                     v1::scheduler::SendAcknowledge(
@@ -2508,8 +2508,7 @@ TEST_P_TEMP_DISABLED_ON_WINDOWS(
     .WillOnce(DoAll(FutureArg<1>(&updateRunning),
                     v1::scheduler::SendAcknowledge(
                         frameworkId,
-                        offer.agent_id())))
-    .WillOnce(FutureArg<1>(&updateFinished));
+                        offer.agent_id())));
 
   mesos.send(v1::createCallAccept(
       frameworkId,
@@ -2524,18 +2523,83 @@ TEST_P_TEMP_DISABLED_ON_WINDOWS(
   ASSERT_EQ(TASK_RUNNING, updateRunning->status().state());
   ASSERT_EQ(taskInfo.task_id(), updateRunning->status().task_id());
 
-  AWAIT_READY(updateFinished);
-  ASSERT_EQ(TASK_FINISHED, updateFinished->status().state());
-  ASSERT_EQ(taskInfo.task_id(), updateFinished->status().task_id());
-
   string volumePath = slave::paths::getPersistentVolumePath(
       flags.work_dir,
       devolve(volume));
 
   string filePath = path::join(volumePath, "file");
 
+  // Wait up to 10 seconds for the task to write a file into the volume.
+  Duration waited = Duration::zero();
+  do {
+    if (os::exists(filePath)) {
+      break;
+    }
+
+    os::sleep(Seconds(1));
+    waited += Seconds(1);
+  } while (waited < Seconds(10));
+
   // Ensure that the task was able to write to the persistent volume.
+  EXPECT_TRUE(os::exists(filePath));
   EXPECT_SOME_EQ("abc\n", os::read(filePath));
+
+  v1::ContainerStatus status = updateRunning->status().container_status();
+
+  ASSERT_TRUE(status.has_container_id());
+  EXPECT_TRUE(status.container_id().has_parent());
+
+  v1::ContainerID executorContainerId = status.container_id().parent();
+
+  string taskPath = slave::paths::getTaskPath(
+      flags.work_dir,
+      devolve(offer.agent_id()),
+      devolve(frameworkId),
+      devolve(executorInfo.executor_id()),
+      devolve(executorContainerId),
+      devolve(taskInfo.task_id()));
+
+  string taskVolumePath =
+    path::join(taskPath, volume.disk().volume().container_path());
+
+  // Ensure the task's volume directory can be accessed from
+  // the `/files` endpoint.
+  process::UPID files("files", slave.get()->pid.address);
+
+  {
+    string query = string("path=") + taskVolumePath;
+    Future<Response> response = process::http::get(
+        files,
+        "browse",
+        query,
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_ASSERT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_ASSERT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Array> parse = JSON::parse<JSON::Array>(response->body);
+    ASSERT_SOME(parse);
+    EXPECT_NE(0, parse->values.size());
+  }
+
+  {
+    string query =
+      string("path=") + path::join(taskVolumePath, "file") + "&offset=0";
+
+    Future<Response> response = process::http::get(
+        files,
+        "read",
+        query,
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_ASSERT_RESPONSE_STATUS_EQ(OK().status, response);
+
+    JSON::Object expected;
+    expected.values["offset"] = 0;
+    expected.values["data"] = "abc\n";
+
+    AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response);
+  }
 }
 
 


[4/4] mesos git commit: Detached the virtual paths regardless of the result of gc.

Posted by qi...@apache.org.
Detached the virtual paths regardless of the result of gc.

Previously we only detach the following paths when the gc for the
executor's sandbox succeeds.
  1. /agent_workdir/frameworks/FID/executors/EID/runs/CID
  2. /agent_workdir/frameworks/FID/executors/EID/runs/latest
  3. /frameworks/FID/executors/EID/runs/latest

But the problem is, such gc may not always succeed, e.g., it may fail
due to the parent directory of the executor's sandbox already gc'ed.

Now in this patch, we will detach those paths regardless of the result
of gc.

Review: https://reviews.apache.org/r/65156


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5225a49c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5225a49c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5225a49c

Branch: refs/heads/master
Commit: 5225a49c495bc7e3362bcee2d460d8c99111c7f4
Parents: 9e2f9a2
Author: Qian Zhang <zh...@gmail.com>
Authored: Sun Jan 14 22:02:33 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 17 10:04:30 2018 +0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5225a49c/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 5ad6410..155d9f0 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5873,7 +5873,7 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
   os::utime(path); // Update the modification time.
   garbageCollect(path)
-    .then(defer(self(), &Self::detachFile, path));
+    .onAny(defer(self(), &Self::detachFile, path));
 
   // Schedule the top level executor work directory, only if the
   // framework doesn't have any 'pending' tasks for this executor.
@@ -5897,10 +5897,9 @@ void Slave::removeExecutor(Framework* framework, Executor* executor)
 
     os::utime(path); // Update the modification time.
     garbageCollect(path)
-      .then(defer(self(), [=]() {
+      .onAny(defer(self(), [=](const Future<Nothing>& future) {
         detachFile(latestPath);
         detachFile(virtualLatestPath);
-        return Nothing();
       }));
   }
 
@@ -8615,7 +8614,7 @@ void Framework::recoverExecutor(
         slave->flags.work_dir, slave->info.id(), id(), state.id, runId);
 
     slave->garbageCollect(path)
-       .then(defer(slave, &Slave::detachFile, path));
+       .onAny(defer(slave, &Slave::detachFile, path));
 
     // GC the executor run's meta directory.
     slave->garbageCollect(paths::getExecutorRunPath(
@@ -8624,7 +8623,7 @@ void Framework::recoverExecutor(
     // GC the top level executor work directory.
     slave->garbageCollect(paths::getExecutorPath(
         slave->flags.work_dir, slave->info.id(), id(), state.id))
-        .then(defer(slave, &Slave::detachFile, latestPath));
+        .onAny(defer(slave, &Slave::detachFile, latestPath));
 
     // GC the top level executor meta directory.
     slave->garbageCollect(paths::getExecutorPath(