You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gr...@apache.org on 2019/09/13 02:17:16 UTC
[mesos] branch master updated: Fixed a bug for non-partition-aware
schedulers.
This is an automated email from the ASF dual-hosted git repository.
grag pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new 8e1a512 Fixed a bug for non-partition-aware schedulers.
8e1a512 is described below
commit 8e1a51207304589a6521cff3540e0705fe1533ff
Author: Greg Mann <gr...@mesosphere.io>
AuthorDate: Thu Sep 12 16:33:20 2019 -0700
Fixed a bug for non-partition-aware schedulers.
Previously, the agent would send task status updates with the state
TASK_GONE_BY_OPERATOR to all schedulers when an agent was drained
with the `mark_gone` parameter set to `true`.
This patch updates this code to ensure that TASK_GONE_BY_OPERATOR
is only sent to partition-aware schedulers.
Review: https://reviews.apache.org/r/71480/
---
src/slave/slave.cpp | 69 ++++++++++++++++++++++++-----------------------
src/tests/slave_tests.cpp | 20 +++++++++++---
2 files changed, 52 insertions(+), 37 deletions(-)
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 4e93656..96890d3 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5773,40 +5773,6 @@ void Slave::statusUpdate(StatusUpdate update, const Option<UPID>& pid)
update.mutable_status()->set_source(
pid == UPID() ? TaskStatus::SOURCE_SLAVE : TaskStatus::SOURCE_EXECUTOR);
- // If the agent is draining we provide additional
- // information for KILLING or KILLED states.
- if (drainConfig.isSome()) {
- switch (update.status().state()) {
- case TASK_STAGING:
- case TASK_STARTING:
- case TASK_RUNNING:
- case TASK_FAILED:
- case TASK_FINISHED:
- case TASK_ERROR:
- case TASK_LOST:
- case TASK_DROPPED:
- case TASK_UNREACHABLE:
- case TASK_GONE:
- case TASK_GONE_BY_OPERATOR:
- case TASK_UNKNOWN: {
- break;
- }
- case TASK_KILLING:
- case TASK_KILLED: {
- // We unconditionally overwrite any previous reason to provide a
- // consistent signal that this task went away during draining.
- update.mutable_status()->set_reason(TaskStatus::REASON_SLAVE_DRAINING);
-
- // If the draining marks the agent as gone report tasks as
- // gone by operator.
- if (drainConfig->mark_gone()) {
- update.mutable_status()->set_state(TASK_GONE_BY_OPERATOR);
- }
- break;
- }
- }
- }
-
// Set TaskStatus.executor_id if not already set; overwrite existing
// value if already set.
if (update.has_executor_id()) {
@@ -5843,6 +5809,41 @@ void Slave::statusUpdate(StatusUpdate update, const Option<UPID>& pid)
return;
}
+ // If the agent is draining we provide additional
+ // information for KILLING or KILLED states.
+ if (drainConfig.isSome()) {
+ switch (update.status().state()) {
+ case TASK_STAGING:
+ case TASK_STARTING:
+ case TASK_RUNNING:
+ case TASK_FAILED:
+ case TASK_FINISHED:
+ case TASK_ERROR:
+ case TASK_LOST:
+ case TASK_DROPPED:
+ case TASK_UNREACHABLE:
+ case TASK_GONE:
+ case TASK_GONE_BY_OPERATOR:
+ case TASK_UNKNOWN: {
+ break;
+ }
+ case TASK_KILLING:
+ case TASK_KILLED: {
+ // We unconditionally overwrite any previous reason to provide a
+ // consistent signal that this task went away during draining.
+ update.mutable_status()->set_reason(TaskStatus::REASON_SLAVE_DRAINING);
+
+ // If the draining marks the agent as gone report tasks as
+ // gone by operator.
+ if (drainConfig->mark_gone() &&
+ framework->capabilities.partitionAware) {
+ update.mutable_status()->set_state(TASK_GONE_BY_OPERATOR);
+ }
+ break;
+ }
+ }
+ }
+
if (HookManager::hooksAvailable()) {
// Even though the hook(s) return a TaskStatus, we only use two fields:
// container_status and labels. Remaining fields are discarded.
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 02b65a9..c147bfc 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -12040,10 +12040,16 @@ TEST_F(SlaveTest, DrainAgentKillsRunningTask)
AWAIT_READY(updateSlaveMessage);
+ // Set the partition-aware capability to ensure that the terminal update state
+ // is TASK_GONE_BY_OPERATOR, since we will set `mark_gone = true`.
+ v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.add_capabilities()->set_type(
+ v1::FrameworkInfo::Capability::PARTITION_AWARE);
+
auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
EXPECT_CALL(*scheduler, connected(_))
- .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO));
+ .WillOnce(v1::scheduler::SendSubscribe(frameworkInfo));
Future<v1::scheduler::Event::Subscribed> subscribed;
EXPECT_CALL(*scheduler, subscribed(_, _))
@@ -12160,10 +12166,16 @@ TEST_F(SlaveTest, DrainAgentKillsQueuedTask)
AWAIT_READY(updateSlaveMessage);
+ // Set the partition-aware capability to ensure that the terminal update state
+ // is TASK_GONE_BY_OPERATOR, since we will set `mark_gone = true`.
+ v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.add_capabilities()->set_type(
+ v1::FrameworkInfo::Capability::PARTITION_AWARE);
+
auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
EXPECT_CALL(*scheduler, connected(_))
- .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO));
+ .WillOnce(v1::scheduler::SendSubscribe(frameworkInfo));
Future<v1::scheduler::Event::Subscribed> subscribed;
EXPECT_CALL(*scheduler, subscribed(_, _))
@@ -12351,7 +12363,9 @@ TEST_F(SlaveTest, DrainAgentKillsPendingTask)
AWAIT_READY(killedUpdate);
- EXPECT_EQ(v1::TASK_GONE_BY_OPERATOR, killedUpdate->status().state());
+ // The terminal update state in this case should be TASK_KILLED because the
+ // scheduler is not partition-aware.
+ EXPECT_EQ(v1::TASK_KILLED, killedUpdate->status().state());
EXPECT_EQ(
v1::TaskStatus::REASON_AGENT_DRAINING, killedUpdate->status().reason());
}