You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/12/22 11:11:16 UTC
[1/4] mesos git commit: Promoted log level to warning for
disconnected events in exec.cpp.
Repository: mesos
Updated Branches:
refs/heads/master 8714e97be -> 769108e94
Promoted log level to warning for disconnected events in exec.cpp.
When the executor library receives messages while being disconnected,
it might indicate an out-of-order message delivery or lost messages.
This should be logged at the warning level to simplify triaging.
Review: https://reviews.apache.org/r/64032/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/44a702a1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/44a702a1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/44a702a1
Branch: refs/heads/master
Commit: 44a702a1b26963040e6cb6c362b7f01e5b4ef097
Parents: 8714e97
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Fri Dec 22 12:09:58 2017 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Dec 22 12:09:58 2017 +0100
----------------------------------------------------------------------
src/exec/exec.cpp | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/44a702a1/src/exec/exec.cpp
----------------------------------------------------------------------
diff --git a/src/exec/exec.cpp b/src/exec/exec.cpp
index 7fc46da..07163a5 100644
--- a/src/exec/exec.cpp
+++ b/src/exec/exec.cpp
@@ -209,8 +209,7 @@ public:
protected:
virtual void initialize()
{
- VLOG(1) << "Executor started at: " << self()
- << " with pid " << getpid();
+ VLOG(1) << "Executor started at: " << self() << " with pid " << getpid();
link(slave);
@@ -318,8 +317,8 @@ protected:
}
if (!connected) {
- VLOG(1) << "Ignoring run task message for task " << task.task_id()
- << " because the driver is disconnected!";
+ LOG(WARNING) << "Ignoring run task message for task " << task.task_id()
+ << " because the driver is disconnected!";
return;
}
@@ -378,10 +377,10 @@ protected:
}
if (!connected) {
- VLOG(1) << "Ignoring status update acknowledgement "
- << uuid_.get() << " for task " << taskId
- << " of framework " << frameworkId
- << " because the driver is disconnected!";
+ LOG(WARNING) << "Ignoring status update acknowledgement "
+ << uuid_.get() << " for task " << taskId
+ << " of framework " << frameworkId
+ << " because the driver is disconnected!";
return;
}
@@ -408,8 +407,8 @@ protected:
}
if (!connected) {
- VLOG(1) << "Ignoring framework message because "
- << "the driver is disconnected!";
+ LOG(WARNING) << "Ignoring framework message because"
+ << " the driver is disconnected!";
return;
}
[2/4] mesos git commit: Ensured command executor always honors
shutdown request.
Posted by al...@apache.org.
Ensured command executor always honors shutdown request.
Review: https://reviews.apache.org/r/64069/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/47392cf9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/47392cf9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/47392cf9
Branch: refs/heads/master
Commit: 47392cf9f9024718550c69bcef9319560b47d5c7
Parents: 44a702a
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Fri Dec 22 12:10:15 2017 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Dec 22 12:10:15 2017 +0100
----------------------------------------------------------------------
src/launcher/executor.cpp | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/47392cf9/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 31a4710..dd76c9f 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -765,6 +765,8 @@ protected:
if (launched) {
CHECK_SOME(taskId);
kill(taskId.get(), gracePeriod);
+ } else {
+ terminate(self());
}
}
[3/4] mesos git commit: Ensured executor adapter propagates error and
shutdown messages.
Posted by al...@apache.org.
Ensured executor adapter propagates error and shutdown messages.
Prior to this patch, if an error, kill, or shutdown occurred during
subscription / registration with the agent, it was not propagated back
to the executor if the v0_v1 executor adapter was used. This happened
because the adapter did not call the `connected` callback until after
successful registration and hence the executor did not even try to
send the `SUBSCRIBE` call, without which the adapter did not send any
events to the executor.
A fix is to call the `connected` callback if an error occurred or
shutdown / kill event arrived before the executor had subscribed.
Review: https://reviews.apache.org/r/64070/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b2eddcfe
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b2eddcfe
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b2eddcfe
Branch: refs/heads/master
Commit: b2eddcfe0ede4725208ae33c8c7f56563ff10514
Parents: 47392cf
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Fri Dec 22 12:10:28 2017 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Dec 22 12:10:28 2017 +0100
----------------------------------------------------------------------
src/executor/v0_v1executor.cpp | 41 ++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/b2eddcfe/src/executor/v0_v1executor.cpp
----------------------------------------------------------------------
diff --git a/src/executor/v0_v1executor.cpp b/src/executor/v0_v1executor.cpp
index 61d5919..086cfc7 100644
--- a/src/executor/v0_v1executor.cpp
+++ b/src/executor/v0_v1executor.cpp
@@ -52,6 +52,7 @@ public:
const function<void(const queue<Event>&)>& received)
: ProcessBase(process::ID::generate("v0-to-v1-adapter")),
callbacks {connected, disconnected, received},
+ connected(false),
subscribeCall(false) {}
virtual ~V0ToV1AdapterProcess() = default;
@@ -61,7 +62,10 @@ public:
const mesos::FrameworkInfo& _frameworkInfo,
const mesos::SlaveInfo& slaveInfo)
{
- callbacks.connected();
+ if (!connected) {
+ callbacks.connected();
+ connected = true;
+ }
// We need these copies to populate the fields in `Event::Subscribed` upon
// receiving a `reregistered()` callback later.
@@ -92,6 +96,7 @@ public:
// disconnection from the agent.
callbacks.disconnected();
callbacks.connected();
+ connected = true;
Event event;
event.set_type(Event::SUBSCRIBED);
@@ -111,6 +116,17 @@ public:
void killTask(const mesos::TaskID& taskId)
{
+ // Logically an executor cannot receive any response from an agent if it
+ // is not connected. Since we have received `killTask`, we assume we are
+ // connected and trigger the `connected` callback to enable event delivery.
+ // This satisfies the invariant of the v1 interface that an executor can
+ // receive an event only after successfully connecting with the agent.
+ if (!connected) {
+ LOG(INFO) << "Implicitly connecting the executor to kill a task";
+ callbacks.connected();
+ connected = true;
+ }
+
Event event;
event.set_type(Event::KILL);
@@ -147,6 +163,17 @@ public:
void shutdown()
{
+ // Logically an executor cannot receive any response from an agent if it
+ // is not connected. Since we have received `shutdown`, we assume we are
+ // connected and trigger the `connected` callback to enable event delivery.
+ // This satisfies the invariant of the v1 interface that an executor can
+ // receive an event only after successfully connecting with the agent.
+ if (!connected) {
+ LOG(INFO) << "Implicitly connecting the executor to shut it down";
+ callbacks.connected();
+ connected = true;
+ }
+
Event event;
event.set_type(Event::SHUTDOWN);
@@ -155,6 +182,17 @@ public:
void error(const string& message)
{
+ // Logically an executor cannot receive any response from an agent if it
+ // is not connected. Since we have received `error`, we assume we are
+ // connected and trigger the `connected` callback to enable event delivery.
+ // This satisfies the invariant of the v1 interface that an executor can
+ // receive an event only after successfully connecting with the agent.
+ if (!connected) {
+ LOG(INFO) << "Implicitly connecting the executor to send an error";
+ callbacks.connected();
+ connected = true;
+ }
+
Event event;
event.set_type(Event::ERROR);
@@ -232,6 +270,7 @@ private:
};
Callbacks callbacks;
+ bool connected;
bool subscribeCall;
queue<Event> pending;
Option<mesos::ExecutorInfo> executorInfo;
[4/4] mesos git commit: Terminated driver-based executors if kill
arrives before launch task.
Posted by al...@apache.org.
Terminated driver-based executors if kill arrives before launch task.
`ExecutorRegisteredMessage` or `RunTaskMessage` may not be delivered
to a driver-based executor. Since these messages are not retried,
without this patch an executor never starts a task and remains idle,
ignoring kill task request. This patch ensures all built-in driver-
based executors eventually shut down if kill task arrives before
the task has been started.
Review: https://reviews.apache.org/r/64033/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/769108e9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/769108e9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/769108e9
Branch: refs/heads/master
Commit: 769108e94a7c7834c44e01091a9940354eb3f6e4
Parents: b2eddcf
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Fri Dec 22 12:10:35 2017 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Dec 22 12:10:35 2017 +0100
----------------------------------------------------------------------
src/docker/executor.cpp | 6 ++++++
src/exec/exec.cpp | 11 +++++++++++
src/launcher/executor.cpp | 6 ++++++
3 files changed, 23 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/769108e9/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 3974f20..e4c53d5 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -383,6 +383,12 @@ private:
return;
}
+ // Terminate if a kill task request is received before the task is launched.
+ // This can happen, for example, if `RunTaskMessage` has not been delivered.
+ // See MESOS-8297.
+ CHECK_SOME(run) << "Terminating because kill task message has been"
+ << " received before the task has been launched";
+
// TODO(alexr): If a kill is in progress, consider adjusting
// the grace period if a new one is provided.
http://git-wip-us.apache.org/repos/asf/mesos/blob/769108e9/src/exec/exec.cpp
----------------------------------------------------------------------
diff --git a/src/exec/exec.cpp b/src/exec/exec.cpp
index 07163a5..0c76a3f 100644
--- a/src/exec/exec.cpp
+++ b/src/exec/exec.cpp
@@ -347,6 +347,17 @@ protected:
return;
}
+ // A kill task request is received when the driver is not connected. This
+ // can happen, for example, if `ExecutorRegisteredMessage` has not been
+ // delivered. We do not shutdown the driver because there might be other
+ // still running tasks and the executor might eventually reconnect, e.g.,
+ // after the agent failover. We do not drop ignore the message because the
+ // actual executor may still want to react, e.g., commit suicide.
+ if (!connected) {
+ LOG(WARNING) << "Executor received kill task message for task " << taskId
+ << " while disconnected from the agent!";
+ }
+
VLOG(1) << "Executor asked to kill task '" << taskId << "'";
Stopwatch stopwatch;
http://git-wip-us.apache.org/repos/asf/mesos/blob/769108e9/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index dd76c9f..794bf7c 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -777,6 +777,12 @@ private:
return;
}
+ // Terminate if a kill task request is received before the task is launched.
+ // This can happen, for example, if `RunTaskMessage` has not been delivered.
+ // See MESOS-8297.
+ CHECK(launched) << "Terminating because kill task message has been"
+ << " received before the task has been launched";
+
// If the task is being killed but has not terminated yet and
// we receive another kill request. Check if we need to adjust
// the remaining grace period.