You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2017/08/24 00:25:39 UTC
[6/8] mesos git commit: Improved the reason and message for tasks
killed before delivery.
Improved the reason and message for tasks killed before delivery.
Previously, if a task was killed before delivery to a registering
executor, we sent REASON_EXECUTOR_UNREGISTERED with a message of
"Unregistered executor" in the agent, and the master sent no reason.
This introduces a REASON_TASK_KILLED_BEFORE_DELIVERY to clarify
this case to frameworks.
Review: https://reviews.apache.org/r/61640
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/623df7de
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/623df7de
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/623df7de
Branch: refs/heads/1.4.x
Commit: 623df7de2a09550fec04ac59f145ed316870aa7e
Parents: 20e32f6
Author: Benjamin Mahler <bm...@apache.org>
Authored: Fri Aug 11 13:38:08 2017 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Aug 23 16:57:11 2017 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 3 ++-
include/mesos/v1/mesos.proto | 3 ++-
src/master/master.cpp | 6 ++++--
src/slave/slave.cpp | 23 +++++++++++++----------
src/tests/master_authorization_tests.cpp | 7 ++++++-
src/tests/slave_recovery_tests.cpp | 11 ++++++-----
src/tests/slave_tests.cpp | 10 ++++++----
7 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 2cef6fc..3449c2d 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2163,7 +2163,7 @@ message TaskStatus {
REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23;
REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24;
REASON_EXECUTOR_TERMINATED = 1;
- REASON_EXECUTOR_UNREGISTERED = 2;
+ REASON_EXECUTOR_UNREGISTERED = 2; // No longer used.
REASON_FRAMEWORK_REMOVED = 3;
REASON_GC_ERROR = 4;
REASON_INVALID_FRAMEWORKID = 5;
@@ -2176,6 +2176,7 @@ message TaskStatus {
REASON_SLAVE_REMOVED = 11;
REASON_SLAVE_RESTARTED = 12;
REASON_SLAVE_UNKNOWN = 13;
+ REASON_TASK_KILLED_DURING_LAUNCH = 30;
REASON_TASK_CHECK_STATUS_UPDATED = 28;
REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29;
REASON_TASK_GROUP_INVALID = 25;
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 4f8e0a0..4d905d3 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2146,7 +2146,7 @@ message TaskStatus {
REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23;
REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24;
REASON_EXECUTOR_TERMINATED = 1;
- REASON_EXECUTOR_UNREGISTERED = 2;
+ REASON_EXECUTOR_UNREGISTERED = 2; // No longer used.
REASON_FRAMEWORK_REMOVED = 3;
REASON_GC_ERROR = 4;
REASON_INVALID_FRAMEWORKID = 5;
@@ -2159,6 +2159,7 @@ message TaskStatus {
REASON_AGENT_REMOVED = 11;
REASON_AGENT_RESTARTED = 12;
REASON_AGENT_UNKNOWN = 13;
+ REASON_TASK_KILLED_DURING_LAUNCH = 30;
REASON_TASK_CHECK_STATUS_UPDATED = 28;
REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29;
REASON_TASK_GROUP_INVALID = 25;
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 7f38a5e..a9d2191 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4951,7 +4951,8 @@ void Master::_accept(
TaskStatus::SOURCE_MASTER,
None(),
"A task within the task group was killed before"
- " delivery to the agent");
+ " delivery to the agent",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH);
metrics->tasks_killed++;
@@ -5320,7 +5321,8 @@ void Master::kill(Framework* framework, const scheduler::Call::Kill& kill)
TASK_KILLED,
TaskStatus::SOURCE_MASTER,
None(),
- "Killed pending task");
+ "Killed before delivery to the agent",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH);
forward(update, UPID(), framework);
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 6c50659..fe01cdc 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -1915,7 +1915,8 @@ void Slave::_run(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Task killed before it was launched");
+ "Killed before delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH);
// TODO(vinod): Ensure that the status update manager reliably
// delivers this update. Currently, we don't guarantee this
@@ -2081,7 +2082,8 @@ void Slave::__run(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Task killed before it was launched");
+ "Killed before delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH);
statusUpdate(update, UPID());
}
@@ -3032,8 +3034,9 @@ void Slave::killTask(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Unregistered executor",
- TaskStatus::REASON_EXECUTOR_UNREGISTERED,
+ "A task within the task group was killed before"
+ " delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
executor->id));
}
} else {
@@ -3044,8 +3047,8 @@ void Slave::killTask(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Unregistered executor",
- TaskStatus::REASON_EXECUTOR_UNREGISTERED,
+ "Killed before delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
executor->id));
}
@@ -3089,8 +3092,8 @@ void Slave::killTask(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Task killed while it was queued",
- None(),
+ "Killed before delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
executor->id));
}
} else {
@@ -3101,8 +3104,8 @@ void Slave::killTask(
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
- "Task killed while it was queued",
- None(),
+ "Killed before delivery to the executor",
+ TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
executor->id));
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/src/tests/master_authorization_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_authorization_tests.cpp b/src/tests/master_authorization_tests.cpp
index 8d54472..0c68a1f 100644
--- a/src/tests/master_authorization_tests.cpp
+++ b/src/tests/master_authorization_tests.cpp
@@ -408,6 +408,7 @@ TEST_F(MasterAuthorizationTest, KillTask)
// Framework should get a TASK_KILLED right away.
AWAIT_READY(status);
EXPECT_EQ(TASK_KILLED, status->state());
+ EXPECT_EQ(TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH, status->reason());
Future<Nothing> recoverResources =
FUTURE_DISPATCH(_, &MesosAllocatorProcess::recoverResources);
@@ -530,7 +531,9 @@ TEST_F(MasterAuthorizationTest, KillPendingTaskInTaskGroup)
AWAIT_READY(task1Status);
EXPECT_EQ(TASK_KILLED, task1Status->state());
EXPECT_TRUE(strings::contains(
- task1Status->message(), "Killed pending task"));
+ task1Status->message(), "Killed before delivery to the agent"));
+ EXPECT_EQ(TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
+ task1Status->reason());
Future<Nothing> recoverResources =
FUTURE_DISPATCH(_, &MesosAllocatorProcess::recoverResources);
@@ -544,6 +547,8 @@ TEST_F(MasterAuthorizationTest, KillPendingTaskInTaskGroup)
EXPECT_TRUE(strings::contains(
task2Status->message(),
"A task within the task group was killed before delivery to the agent"));
+ EXPECT_EQ(TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH,
+ task2Status->reason());
// No task launch should happen resulting in all resources being
// returned to the allocator.
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 9ba6f60..90cce07 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -1297,10 +1297,11 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
}
-// This test verifies that when the agent gets a `killTask` message and restarts
-// before the executor registers, a TASK_KILLED update is sent and the executor
-// shuts down.
-TYPED_TEST(SlaveRecoveryTest, KillTaskUnregisteredExecutor)
+// This test verifies that when the agent gets a `killTask`
+// message for a queued task on a registering executor, a
+// restart of the agent will generate a TASK_KILLED and
+// will shut down the executor.
+TYPED_TEST(SlaveRecoveryTest, KillQueuedTaskDuringExecutorRegistration)
{
Try<Owned<cluster::Master>> master = this->StartMaster();
ASSERT_SOME(master);
@@ -1360,7 +1361,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTaskUnregisteredExecutor)
AWAIT_READY(status);
EXPECT_EQ(TASK_KILLED, status->state());
- EXPECT_EQ(TaskStatus::REASON_EXECUTOR_UNREGISTERED, status->reason());
+ EXPECT_EQ(TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH, status->reason());
slave.get()->terminate();
http://git-wip-us.apache.org/repos/asf/mesos/blob/623df7de/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 8965915..9d33d57 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -4091,9 +4091,11 @@ TEST_F(SlaveTest, KillTaskBetweenRunTaskParts)
}
-// This test ensures that if a `killTask()` for an executor is received by the
-// agent before the executor registers, the executor is properly cleaned up.
-TEST_F(SlaveTest, KillTaskUnregisteredExecutor)
+// This test verifies that when the agent gets a `killTask`
+// message for a queued task on a registering executor, a
+// the agent will generate a TASK_KILLED and will shut down
+// the executor.
+TEST_F(SlaveTest, KillQueuedTaskDuringExecutorRegistration)
{
// Start a master.
Try<Owned<cluster::Master>> master = StartMaster();
@@ -4161,7 +4163,7 @@ TEST_F(SlaveTest, KillTaskUnregisteredExecutor)
AWAIT_READY(status);
EXPECT_EQ(TASK_KILLED, status->state());
- EXPECT_EQ(TaskStatus::REASON_EXECUTOR_UNREGISTERED, status->reason());
+ EXPECT_EQ(TaskStatus::REASON_TASK_KILLED_DURING_LAUNCH, status->reason());
// Now let the executor register by spoofing the message.
RegisterExecutorMessage registerExecutor;