You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/10/18 19:41:53 UTC
[3/6] mesos git commit: Send TASK_STARTING from the built-in
executors.
Send TASK_STARTING from the built-in executors.
This gives schedulers more information about a tasks status,
in particular it gives a better estimate of a tasks start time
and helps differentiating between tasks stuck in TASK_STAGING
and tasks stuck in TASK_STARTING.
Review: https://reviews.apache.org/r/62212/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/de11f0ff
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/de11f0ff
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/de11f0ff
Branch: refs/heads/master
Commit: de11f0ffed36fbabc8fb4167859fd23b75f43f10
Parents: 4b6d848
Author: Benno Evers <be...@mesosphere.com>
Authored: Wed Oct 18 12:15:13 2017 -0700
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Wed Oct 18 12:15:13 2017 -0700
----------------------------------------------------------------------
docs/high-availability-framework-guide.md | 9 +++--
src/docker/executor.cpp | 8 ++++
src/launcher/default_executor.cpp | 52 +++++++++++++++++---------
src/launcher/executor.cpp | 5 ++-
4 files changed, 51 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/de11f0ff/docs/high-availability-framework-guide.md
----------------------------------------------------------------------
diff --git a/docs/high-availability-framework-guide.md b/docs/high-availability-framework-guide.md
index 73743ab..f246773 100644
--- a/docs/high-availability-framework-guide.md
+++ b/docs/high-availability-framework-guide.md
@@ -189,10 +189,11 @@ initial state and several possible terminal states:
has not yet started to run. In this state, the task's dependencies are
fetched---for example, using the [Mesos fetcher cache](fetcher.md).
-* The `TASK_STARTING` state is optional and intended primarily for use by
- custom executors. It can be used to describe the fact that a custom executor
- has learned about the task (and maybe started fetching its dependencies) but has
- not yet started to run it.
+* The `TASK_STARTING` state is optional. It can be used to describe the fact
+ that an executor has learned about the task (and maybe started fetching its
+ dependencies) but has not yet started to run it. Custom executors are
+ encouraged to send it, to provide a more detailed description of the current
+ task state to outside observers.
* A task transitions to the `TASK_RUNNING` state after it has begun running
successfully (if the task fails to start, it transitions to one of the
http://git-wip-us.apache.org/repos/asf/mesos/blob/de11f0ff/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 3b0767f..5f6a0d0 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -159,6 +159,14 @@ public:
LOG(INFO) << "Starting task " << taskId.get();
+ // Send initial TASK_STARTING update.
+ // TODO(alexr): Use `protobuf::createTaskStatus()`
+ // instead of manually setting fields.
+ TaskStatus starting;
+ starting.mutable_task_id()->CopyFrom(task.task_id());
+ starting.set_state(TASK_STARTING);
+ driver->sendStatusUpdate(starting);
+
CHECK(task.has_container());
CHECK(task.has_command());
http://git-wip-us.apache.org/repos/asf/mesos/blob/de11f0ff/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index e58766f..cdb3c3e 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -108,6 +108,12 @@ private:
// `WAIT_NESTED_CONTAINER` call has not been established yet.
Option<Connection> waiting;
+ // TODO(bennoe): Create a real state machine instead of adding
+ // more and more ad-hoc boolean values.
+
+ // Indicates whether a container has been launched.
+ bool launched;
+
// Indicates whether a status update acknowledgement
// has been received for any status update.
bool acknowledged;
@@ -318,12 +324,14 @@ protected:
subscribe->add_unacknowledged_updates()->MergeFrom(update);
}
- // Send all unacknowledged tasks. We don't send unacknowledged terminated
- // (and hence already removed from `containers`) tasks, because for such
- // tasks `WAIT_NESTED_CONTAINER` call has already succeeded, meaning the
- // agent knows about the tasks and corresponding containers.
+ // Send all unacknowledged tasks. We don't send tasks whose container
+ // didn't launch yet, because the agent will learn about once it launches.
+ // We also don't send unacknowledged terminated (and hence already removed
+ // from `containers`) tasks, because for such tasks `WAIT_NESTED_CONTAINER`
+ // call has already succeeded, meaning the agent knows about the tasks and
+ // corresponding containers.
foreachvalue (const Owned<Container>& container, containers) {
- if (!container->acknowledged) {
+ if (container->launched && !container->acknowledged) {
subscribe->add_unacknowledged_tasks()->MergeFrom(container->taskInfo);
}
}
@@ -403,6 +411,23 @@ protected:
containerIds.push_back(containerId);
+ containers[task.task_id()] = Owned<Container>(new Container{
+ containerId,
+ task,
+ taskGroup,
+ None(),
+ None(),
+ None(),
+ None(),
+ false,
+ false,
+ false,
+ false});
+
+ // Send out the initial TASK_STARTING update.
+ const TaskStatus status = createTaskStatus(task.task_id(), TASK_STARTING);
+ forward(status);
+
agent::Call call;
call.set_type(agent::Call::LAUNCH_NESTED_CONTAINER);
@@ -526,17 +551,8 @@ protected:
const TaskInfo& task = taskGroup.tasks().Get(index++);
const TaskID& taskId = task.task_id();
- containers[taskId] = Owned<Container>(new Container{
- containerId,
- task,
- taskGroup,
- None(),
- None(),
- None(),
- None(),
- false,
- false,
- false});
+ CHECK(containers.contains(taskId));
+ containers.at(taskId)->launched = true;
if (task.has_check()) {
Try<Owned<checks::Checker>> checker =
@@ -1410,7 +1426,7 @@ private:
CHECK_EQ(SUBSCRIBED, state);
CHECK_SOME(connectionId);
- CHECK(containers.contains(taskId));
+ CHECK(containers.contains(taskId) && containers.at(taskId)->launched);
const Owned<Container>& container = containers.at(taskId);
@@ -1469,7 +1485,7 @@ private:
LinkedHashMap<UUID, Call::Update> unacknowledgedUpdates;
- // Active child containers.
+ // Child containers.
LinkedHashMap<TaskID, Owned<Container>> containers;
// There can be multiple simulataneous ongoing (re-)connection attempts
http://git-wip-us.apache.org/repos/asf/mesos/blob/de11f0ff/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 0131577..34f6f7a 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -525,6 +525,10 @@ protected:
taskData = TaskData(task);
taskId = task.task_id();
+ // Send initial TASK_STARTING update.
+ TaskStatus starting = createTaskStatus(taskId.get(), TASK_STARTING);
+ forward(starting);
+
// Capture the kill policy.
if (task.has_kill_policy()) {
killPolicy = task.kill_policy();
@@ -1016,7 +1020,6 @@ private:
// If a check for the task has been defined, `check_status` field in each
// task status must be set to a valid `CheckStatusInfo` message even if
// there is no check status available yet.
- CHECK(taskData.isSome());
if (taskData->taskInfo.has_check()) {
CheckStatusInfo checkStatusInfo;
checkStatusInfo.set_type(taskData->taskInfo.check().type());