You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jp...@apache.org on 2017/10/15 22:48:45 UTC
[1/9] mesos git commit: Stopped keeping multiple limitations in
MesosContainerizer.
Repository: mesos
Updated Branches:
refs/heads/master 1c51c9863 -> cc29c27ee
Stopped keeping multiple limitations in MesosContainerizer.
MesosContainerizer was keeping a vector of ContainerLimitation
objects, but in practice it was not possible to have more than one at
a time, since receiving a limitation initiates a container destroy
and new limitations are dropped once the destroy begins. We replace
the vector<ContainerLimition> with a Option<ContainerLimitation>,
but otherwise preserve all the existing semantics.
Review: https://reviews.apache.org/r/62640/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/14117009
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/14117009
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/14117009
Branch: refs/heads/master
Commit: 1411700928e51aabbb3d38641c9d9f39815918d4
Parents: 1c51c98
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:19 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:19 2017 -0700
----------------------------------------------------------------------
src/slave/containerizer/mesos/containerizer.cpp | 18 +++++-------------
src/slave/containerizer/mesos/containerizer.hpp | 6 +++---
2 files changed, 8 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/14117009/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 4d5dc13..4851855 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2440,21 +2440,13 @@ void MesosContainerizerProcess::______destroy(
// registered. This could occur if the limitation (e.g., an OOM)
// killed the executor and we triggered destroy() off the executor
// exit.
- if (!container->limitations.empty()) {
+ if (container->limitation.isSome()) {
termination.set_state(TaskState::TASK_FAILED);
+ termination.set_message(container->limitation->message());
- // We concatenate the messages if there are multiple limitations.
- vector<string> messages;
-
- foreach (const ContainerLimitation& limitation, container->limitations) {
- messages.push_back(limitation.message());
-
- if (limitation.has_reason()) {
- termination.add_reasons(limitation.reason());
- }
+ if (container->limitation->has_reason()) {
+ termination.add_reasons(container->limitation->reason());
}
-
- termination.set_message(strings::join("; ", messages));
}
// Now that we are done destroying the container we need to cleanup
@@ -2669,7 +2661,7 @@ void MesosContainerizerProcess::limited(
<< " resource " << future.get().resources()
<< " and will be terminated";
- containers_.at(containerId)->limitations.push_back(future.get());
+ containers_.at(containerId)->limitation = future.get();
} else {
// TODO(idownes): A discarded future will not be an error when
// isolators discard their promises after cleanup.
http://git-wip-us.apache.org/repos/asf/mesos/blob/14117009/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index cc23b4d..ad01a97 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -329,9 +329,9 @@ private:
// calling cleanup after all isolators have finished isolating.
process::Future<std::list<Nothing>> isolation;
- // We keep track of any limitations received from each isolator so
- // we can determine the cause of a container termination.
- std::vector<mesos::slave::ContainerLimitation> limitations;
+ // We keep track of any limitation received from an isolator
+ // so we can determine the cause of a container termination.
+ Option<mesos::slave::ContainerLimitation> limitation;
// We keep track of the resources for each container so we can set
// the ResourceStatistics limits in usage().
[5/9] mesos git commit: Propagated the container termination to the
agent API.
Posted by jp...@apache.org.
Propagated the container termination to the agent API.
Updated the agent API so that we can propagate information
from the container termination up to the `WaitNestedContainer`
response. We now propagate resources all the way from the
container limitation to the `WaitNestedContainer` response so
that an executor can know specifically which resource limit
violation caused the container termination.
Review: https://reviews.apache.org/r/62643/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/98d96ca9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/98d96ca9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/98d96ca9
Branch: refs/heads/master
Commit: 98d96ca96570eb4d0d1604ba738c24ecc7e71f7f
Parents: f3ee923
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:41 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:41 2017 -0700
----------------------------------------------------------------------
include/mesos/agent/agent.proto | 13 +++++++++++++
include/mesos/slave/containerizer.proto | 7 +++++++
include/mesos/v1/agent/agent.proto | 9 +++++++++
src/slave/containerizer/mesos/containerizer.cpp | 5 +++++
src/slave/http.cpp | 17 +++++++++++++++++
5 files changed, 51 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/agent/agent.proto
----------------------------------------------------------------------
diff --git a/include/mesos/agent/agent.proto b/include/mesos/agent/agent.proto
index 527bcf2..4df3dce 100644
--- a/include/mesos/agent/agent.proto
+++ b/include/mesos/agent/agent.proto
@@ -338,6 +338,19 @@ message Response {
// family of macros to extract whether the process exited cleanly and
// what the exit code was.
optional int32 exit_status = 1;
+
+ // The `state` and `reason` fields may be populated if the Mesos agent
+ // terminates the container. In the absence of any special knowledge,
+ // executors should propagate this information via the `status` field
+ // of an `Update` call for the corresponding TaskID.
+ optional TaskState state = 2;
+ optional TaskStatus.Reason reason = 3;
+
+ // This field will be populated if the task was terminated due to
+ // a resource limitation.
+ optional TaskResourceLimitation limitation = 4;
+
+ optional string message = 5;
}
optional Type type = 1;
http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/slave/containerizer.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/containerizer.proto b/include/mesos/slave/containerizer.proto
index 4375a38..689acfc 100644
--- a/include/mesos/slave/containerizer.proto
+++ b/include/mesos/slave/containerizer.proto
@@ -247,4 +247,11 @@ message ContainerTermination {
optional TaskState state = 4;
optional TaskStatus.Reason reason = 5;
optional string message = 2;
+
+ // If the container was terminated due to a resource limitation,
+ // this is the resource that caused the termination.
+ //
+ // NOTE: 'Resources' is used here because the resource may span
+ // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+ repeated Resource limited_resources = 6;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/v1/agent/agent.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/agent/agent.proto b/include/mesos/v1/agent/agent.proto
index be2a2f7..e99d23d 100644
--- a/include/mesos/v1/agent/agent.proto
+++ b/include/mesos/v1/agent/agent.proto
@@ -338,6 +338,15 @@ message Response {
// family of macros to extract whether the process exited cleanly and
// what the exit code was.
optional int32 exit_status = 1;
+
+ optional TaskState state = 2;
+ optional TaskStatus.Reason reason = 3;
+
+ // This field will be populated if the task was terminated due to
+ // a resource limitation.
+ optional TaskResourceLimitation limitation = 4;
+
+ optional string message = 5;
}
optional Type type = 1;
http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 23caba2..78fdd21 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2685,6 +2685,11 @@ void MesosContainerizerProcess::limited(
if (future->has_reason()) {
termination->set_reason(future->reason());
}
+
+ if (!future->resources().empty()) {
+ termination->mutable_limited_resources()->CopyFrom(
+ future->resources());
+ }
} else {
// TODO(idownes): A discarded future will not be an error when
// isolators discard their promises after cleanup.
http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/src/slave/http.cpp
----------------------------------------------------------------------
diff --git a/src/slave/http.cpp b/src/slave/http.cpp
index f4c3e6b..f2e06af 100644
--- a/src/slave/http.cpp
+++ b/src/slave/http.cpp
@@ -2512,6 +2512,23 @@ Future<Response> Http::waitNestedContainer(
waitNestedContainer->set_exit_status(termination->status());
}
+ if (termination->has_state()) {
+ waitNestedContainer->set_state(termination->state());
+ }
+
+ if (termination->has_reason()) {
+ waitNestedContainer->set_reason(termination->reason());
+ }
+
+ if (!termination->limited_resources().empty()) {
+ waitNestedContainer->mutable_limitation()->mutable_resources()
+ ->CopyFrom(termination->limited_resources());
+ }
+
+ if (termination->has_message()) {
+ waitNestedContainer->set_message(termination->message());
+ }
+
return OK(serialize(acceptType, evolve(response)),
stringify(acceptType));
});
[2/9] mesos git commit: Removed multiple reasons from
ContainerTermination.
Posted by jp...@apache.org.
Removed multiple reasons from ContainerTermination.
Since the ContainerLimitation is no longer ever populated with more
than one TaskStatus.Reason, make it optional rather than repeated.
Review: https://reviews.apache.org/r/62641/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0f5328ad
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0f5328ad
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0f5328ad
Branch: refs/heads/master
Commit: 0f5328ad08849694cd0ca897f957852337bba48c
Parents: 1411700
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:24 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:24 2017 -0700
----------------------------------------------------------------------
include/mesos/slave/containerizer.proto | 2 +-
src/slave/containerizer/mesos/containerizer.cpp | 2 +-
src/slave/slave.cpp | 27 ++++++++++----------
.../containerizer/io_switchboard_tests.cpp | 4 +--
4 files changed, 17 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/include/mesos/slave/containerizer.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/containerizer.proto b/include/mesos/slave/containerizer.proto
index 84f9ca7..4375a38 100644
--- a/include/mesos/slave/containerizer.proto
+++ b/include/mesos/slave/containerizer.proto
@@ -245,6 +245,6 @@ message ContainerTermination {
// The 'state', 'reasons' and 'message' of a status update for
// non-terminal tasks when the executor is terminated.
optional TaskState state = 4;
- repeated TaskStatus.Reason reasons = 5;
+ optional TaskStatus.Reason reason = 5;
optional string message = 2;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 4851855..fd7b71e 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2445,7 +2445,7 @@ void MesosContainerizerProcess::______destroy(
termination.set_message(container->limitation->message());
if (container->limitation->has_reason()) {
- termination.add_reasons(container->limitation->reason());
+ termination.set_reason(container->limitation->reason());
}
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index c35cf7d..df0e894 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2493,7 +2493,7 @@ void Slave::___run(
ContainerTermination termination;
termination.set_state(taskState);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
termination.set_message(
"Failed to update resources for container: " +
(future.isFailed() ? future.failure() : "discarded"));
@@ -2753,7 +2753,7 @@ void Slave::launchExecutor(
// and perform cleanup via `executorTerminated`.
ContainerTermination termination;
termination.set_state(TASK_FAILED);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
termination.set_message("Executor " + executorState);
executorTerminated(frameworkId, executorId, termination);
@@ -2774,7 +2774,7 @@ void Slave::launchExecutor(
ContainerTermination termination;
termination.set_state(TASK_FAILED);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
termination.set_message(
"Secret generation failed: " +
(future->isFailed() ? future->failure() : "discarded"));
@@ -4303,7 +4303,7 @@ void Slave::_reregisterExecutor(
ContainerTermination termination;
termination.set_state(taskState);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
termination.set_message(
"Failed to update resources for container: " +
(future.isFailed() ? future.failure() : "discarded"));
@@ -4356,7 +4356,7 @@ void Slave::reregisterExecutorTimeout()
ContainerTermination termination;
termination.set_state(taskState);
- termination.add_reasons(
+ termination.set_reason(
TaskStatus::REASON_EXECUTOR_REREGISTRATION_TIMEOUT);
termination.set_message(
"Executor did not re-register within " +
@@ -4737,7 +4737,7 @@ void Slave::__statusUpdate(
ContainerTermination termination;
termination.set_state(taskState);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
termination.set_message(
"Failed to update resources for container: " +
(future->isFailed() ? future->failure() : "discarded"));
@@ -5299,7 +5299,7 @@ void Slave::executorLaunched(
if (executor != nullptr) {
ContainerTermination termination;
termination.set_state(TASK_FAILED);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
termination.set_message(
"Failed to launch container: " +
(future.isFailed() ? future.failure() : "discarded"));
@@ -5883,7 +5883,7 @@ void Slave::registerExecutorTimeout(
ContainerTermination termination;
termination.set_state(TASK_FAILED);
- termination.add_reasons(TaskStatus::REASON_EXECUTOR_REGISTRATION_TIMEOUT);
+ termination.set_reason(TaskStatus::REASON_EXECUTOR_REGISTRATION_TIMEOUT);
termination.set_message(
"Executor did not register within " +
stringify(flags.executor_registration_timeout));
@@ -6681,7 +6681,7 @@ void Slave::_qosCorrections(const Future<list<QoSCorrection>>& future)
ContainerTermination termination;
termination.set_state(taskState);
- termination.add_reasons(TaskStatus::REASON_CONTAINER_PREEMPTED);
+ termination.set_reason(TaskStatus::REASON_CONTAINER_PREEMPTED);
termination.set_message("Container preempted by QoS correction");
executor->pendingTermination = termination;
@@ -6902,13 +6902,12 @@ void Slave::sendExecutorTerminatedStatusUpdate(
}
// Determine the task reason for the status update.
- // TODO(jieyu): Handle multiple reasons (MESOS-2657).
if (termination.isReady() &&
- termination->isSome() && termination->get().reasons().size() > 0) {
- reason = termination->get().reasons(0);
+ termination->isSome() && termination->get().has_reason()) {
+ reason = termination->get().reason();
} else if (executor->pendingTermination.isSome() &&
- executor->pendingTermination->reasons().size() > 0) {
- reason = executor->pendingTermination->reasons(0);
+ executor->pendingTermination->has_reason()) {
+ reason = executor->pendingTermination->reason();
} else {
reason = TaskStatus::REASON_EXECUTOR_TERMINATED;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/tests/containerizer/io_switchboard_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/io_switchboard_tests.cpp b/src/tests/containerizer/io_switchboard_tests.cpp
index bf7917a..c3410cd 100644
--- a/src/tests/containerizer/io_switchboard_tests.cpp
+++ b/src/tests/containerizer/io_switchboard_tests.cpp
@@ -900,9 +900,9 @@ TEST_F(IOSwitchboardTest, KillSwitchboardContainerDestroyed)
ASSERT_TRUE(wait.get()->has_status());
EXPECT_WTERMSIG_EQ(SIGKILL, wait.get()->status());
- ASSERT_TRUE(wait.get()->reasons().size() == 1);
+ ASSERT_TRUE(wait.get()->has_reason());
ASSERT_EQ(TaskStatus::REASON_IO_SWITCHBOARD_EXITED,
- wait.get()->reasons().Get(0));
+ wait.get()->reason());
wait = containerizer->wait(containerId);
[3/9] mesos git commit: Propagated the termination info down the
container tree.
Posted by jp...@apache.org.
Propagated the termination info down the container tree.
When the MesosContainerizer destroys a container tree, we need to
propagate the ContainerTermination down to all the child containers
so that any executor that is waiting for them can receive enough
information to send a useful status update.
Review: https://reviews.apache.org/r/62642/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7d9781f9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7d9781f9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7d9781f9
Branch: refs/heads/master
Commit: 7d9781f93fcde7e9fb4f8dfcbc1e71514f6b2dc7
Parents: 0f5328a
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:35 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:35 2017 -0700
----------------------------------------------------------------------
src/slave/containerizer/mesos/containerizer.cpp | 85 +++++++++++++-------
src/slave/containerizer/mesos/containerizer.hpp | 19 +++--
2 files changed, 66 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/7d9781f9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index fd7b71e..23caba2 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -612,7 +612,8 @@ Future<bool> MesosContainerizer::destroy(const ContainerID& containerId)
{
return dispatch(process.get(),
&MesosContainerizerProcess::destroy,
- containerId);
+ containerId,
+ None());
}
@@ -1003,7 +1004,7 @@ Future<Nothing> MesosContainerizerProcess::__recover(
// Destroy all the orphan containers.
foreach (const ContainerID& containerId, orphans) {
LOG(INFO) << "Cleaning up orphan container " << containerId;
- destroy(containerId);
+ destroy(containerId, None());
}
return Nothing();
@@ -2166,7 +2167,8 @@ Future<ContainerStatus> MesosContainerizerProcess::status(
Future<bool> MesosContainerizerProcess::destroy(
- const ContainerID& containerId)
+ const ContainerID& containerId,
+ const Option<ContainerTermination>& termination)
{
if (!containers_.contains(containerId)) {
// This can happen due to the race between destroys initiated by
@@ -2213,12 +2215,12 @@ Future<bool> MesosContainerizerProcess::destroy(
list<Future<bool>> destroys;
foreach (const ContainerID& child, container->children) {
- destroys.push_back(destroy(child));
+ destroys.push_back(destroy(child, termination));
}
await(destroys)
.then(defer(self(), [=](const list<Future<bool>>& futures) {
- _destroy(containerId, previousState, futures);
+ _destroy(containerId, termination, previousState, futures);
return Nothing();
}));
@@ -2234,6 +2236,7 @@ Future<bool> MesosContainerizerProcess::destroy(
void MesosContainerizerProcess::_destroy(
const ContainerID& containerId,
+ const Option<ContainerTermination>& termination,
const State& previousState,
const list<Future<bool>>& destroys)
{
@@ -2272,6 +2275,7 @@ void MesosContainerizerProcess::_destroy(
self(),
&Self::_____destroy,
containerId,
+ termination,
list<Future<Nothing>>()));
return;
@@ -2295,7 +2299,7 @@ void MesosContainerizerProcess::_destroy(
container->status.isSome()
? container->status.get()
: None())
- .onAny(defer(self(), &Self::____destroy, containerId));
+ .onAny(defer(self(), &Self::____destroy, containerId, termination));
return;
}
@@ -2307,7 +2311,7 @@ void MesosContainerizerProcess::_destroy(
// Wait for the isolators to finish isolating before we start
// to destroy the container.
container->isolation
- .onAny(defer(self(), &Self::__destroy, containerId));
+ .onAny(defer(self(), &Self::__destroy, containerId, termination));
return;
}
@@ -2317,23 +2321,30 @@ void MesosContainerizerProcess::_destroy(
fetcher->kill(containerId);
}
- __destroy(containerId);
+ __destroy(containerId, termination);
}
void MesosContainerizerProcess::__destroy(
- const ContainerID& containerId)
+ const ContainerID& containerId,
+ const Option<ContainerTermination>& termination)
{
CHECK(containers_.contains(containerId));
// Kill all processes then continue destruction.
launcher->destroy(containerId)
- .onAny(defer(self(), &Self::___destroy, containerId, lambda::_1));
+ .onAny(defer(
+ self(),
+ &Self::___destroy,
+ containerId,
+ termination,
+ lambda::_1));
}
void MesosContainerizerProcess::___destroy(
const ContainerID& containerId,
+ const Option<ContainerTermination>& termination,
const Future<Nothing>& future)
{
CHECK(containers_.contains(containerId));
@@ -2361,22 +2372,29 @@ void MesosContainerizerProcess::___destroy(
CHECK_SOME(container->status);
container->status.get()
- .onAny(defer(self(), &Self::____destroy, containerId));
+ .onAny(defer(self(), &Self::____destroy, containerId, termination));
}
void MesosContainerizerProcess::____destroy(
- const ContainerID& containerId)
+ const ContainerID& containerId,
+ const Option<ContainerTermination>& termination)
{
CHECK(containers_.contains(containerId));
cleanupIsolators(containerId)
- .onAny(defer(self(), &Self::_____destroy, containerId, lambda::_1));
+ .onAny(defer(
+ self(),
+ &Self::_____destroy,
+ containerId,
+ termination,
+ lambda::_1));
}
void MesosContainerizerProcess::_____destroy(
const ContainerID& containerId,
+ const Option<ContainerTermination>& termination,
const Future<list<Future<Nothing>>>& cleanups)
{
// This should not occur because we only use the Future<list> to
@@ -2407,12 +2425,18 @@ void MesosContainerizerProcess::_____destroy(
}
provisioner->destroy(containerId)
- .onAny(defer(self(), &Self::______destroy, containerId, lambda::_1));
+ .onAny(defer(
+ self(),
+ &Self::______destroy,
+ containerId,
+ termination,
+ lambda::_1));
}
void MesosContainerizerProcess::______destroy(
const ContainerID& containerId,
+ const Option<ContainerTermination>& _termination,
const Future<bool>& destroy)
{
CHECK(containers_.contains(containerId));
@@ -2430,25 +2454,16 @@ void MesosContainerizerProcess::______destroy(
ContainerTermination termination;
+ if (_termination.isSome()) {
+ termination = _termination.get();
+ }
+
if (container->status.isSome() &&
container->status->isReady() &&
container->status->get().isSome()) {
termination.set_status(container->status->get().get());
}
- // NOTE: We may not see a limitation in time for it to be
- // registered. This could occur if the limitation (e.g., an OOM)
- // killed the executor and we triggered destroy() off the executor
- // exit.
- if (container->limitation.isSome()) {
- termination.set_state(TaskState::TASK_FAILED);
- termination.set_message(container->limitation->message());
-
- if (container->limitation->has_reason()) {
- termination.set_reason(container->limitation->reason());
- }
- }
-
// Now that we are done destroying the container we need to cleanup
// its runtime directory. There are two cases to consider:
//
@@ -2530,7 +2545,7 @@ Future<bool> MesosContainerizerProcess::kill(
LOG(WARNING) << "Unable to find the pid for container " << containerId
<< ", destroying it";
- destroy(containerId);
+ destroy(containerId, None());
return true;
}
@@ -2643,7 +2658,7 @@ void MesosContainerizerProcess::reaped(const ContainerID& containerId)
LOG(INFO) << "Container " << containerId << " has exited";
// The executor has exited so destroy the container.
- destroy(containerId);
+ destroy(containerId, None());
}
@@ -2656,12 +2671,20 @@ void MesosContainerizerProcess::limited(
return;
}
+ Option<ContainerTermination> termination = None();
+
if (future.isReady()) {
LOG(INFO) << "Container " << containerId << " has reached its limit for"
<< " resource " << future.get().resources()
<< " and will be terminated";
- containers_.at(containerId)->limitation = future.get();
+ termination = ContainerTermination();
+ termination->set_state(TaskState::TASK_FAILED);
+ termination->set_message(future->message());
+
+ if (future->has_reason()) {
+ termination->set_reason(future->reason());
+ }
} else {
// TODO(idownes): A discarded future will not be an error when
// isolators discard their promises after cleanup.
@@ -2671,7 +2694,7 @@ void MesosContainerizerProcess::limited(
}
// The container has been affected by the limitation so destroy it.
- destroy(containerId);
+ destroy(containerId, termination);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/7d9781f9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index ad01a97..6d356cc 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -170,7 +170,8 @@ public:
int_fd pipeWrite);
virtual process::Future<bool> destroy(
- const ContainerID& containerId);
+ const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination);
virtual process::Future<bool> kill(
const ContainerID& containerId,
@@ -229,30 +230,38 @@ private:
// Continues 'destroy()' once nested containers are handled.
void _destroy(
const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination,
const State& previousState,
const std::list<process::Future<bool>>& destroys);
// Continues '_destroy()' once isolators has completed.
- void __destroy(const ContainerID& containerId);
+ void __destroy(
+ const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination);
// Continues '__destroy()' once all processes have been killed
// by the launcher.
void ___destroy(
const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination,
const process::Future<Nothing>& future);
// Continues '___destroy()' once we get the exit status of the container.
- void ____destroy(const ContainerID& containerId);
+ void ____destroy(
+ const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination);
// Continues '____destroy()' once all isolators have completed
// cleanup.
void _____destroy(
const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination,
const process::Future<std::list<process::Future<Nothing>>>& cleanups);
// Continues '_____destroy()' once provisioner have completed destroy.
void ______destroy(
const ContainerID& containerId,
+ const Option<mesos::slave::ContainerTermination>& termination,
const process::Future<bool>& destroy);
// Call back for when an isolator limits a container and impacts the
@@ -329,10 +338,6 @@ private:
// calling cleanup after all isolators have finished isolating.
process::Future<std::list<Nothing>> isolation;
- // We keep track of any limitation received from an isolator
- // so we can determine the cause of a container termination.
- Option<mesos::slave::ContainerLimitation> limitation;
-
// We keep track of the resources for each container so we can set
// the ResourceStatistics limits in usage().
Resources resources;
[7/9] mesos git commit: Propagated new wait response fields from the
default executor.
Posted by jp...@apache.org.
Propagated new wait response fields from the default executor.
In the default executor, propagate newly added `WaitNestedContainer`
response fields to the corresponding status update. This allows
frameworks to receive more reliable information about nested
container failures.
Review: https://reviews.apache.org/r/62646/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6b703932
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6b703932
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6b703932
Branch: refs/heads/master
Commit: 6b7039323ae434211ac977d14e3c21a966566f58
Parents: 4dd9ad6
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:53 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:53 2017 -0700
----------------------------------------------------------------------
src/launcher/default_executor.cpp | 40 +++++++++++++++++++++++++++++++---
1 file changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/6b703932/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index a376c09..e58766f 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -799,6 +799,8 @@ protected:
TaskState taskState;
Option<string> message;
+ Option<TaskStatus::Reason> reason;
+ Option<TaskResourceLimitation> limitation;
if (!waitResponse->wait_nested_container().has_exit_status()) {
taskState = TASK_FAILED;
@@ -822,11 +824,38 @@ protected:
message = "Command " + WSTRINGIFY(status);
}
+ // Note that we always prefer the task state and reason from the
+ // agent response over what we can determine ourselves because
+ // in general, the agent has more specific information about why
+ // the container exited (e.g. this might be a container resource
+ // limitation).
+ if (waitResponse->wait_nested_container().has_state()) {
+ taskState = waitResponse->wait_nested_container().state();
+ }
+
+ if (waitResponse->wait_nested_container().has_reason()) {
+ reason = waitResponse->wait_nested_container().reason();
+ }
+
+ if (waitResponse->wait_nested_container().has_message()) {
+ if (message.isSome()) {
+ message->append(
+ ": " + waitResponse->wait_nested_container().message());
+ } else {
+ message = waitResponse->wait_nested_container().message();
+ }
+ }
+
+ if (waitResponse->wait_nested_container().has_limitation()) {
+ limitation = waitResponse->wait_nested_container().limitation();
+ }
+
TaskStatus taskStatus = createTaskStatus(
taskId,
taskState,
- None(),
- message);
+ reason,
+ message,
+ limitation);
// Indicate that a task has been unhealthy upon termination.
if (unhealthy) {
@@ -1241,7 +1270,8 @@ private:
const TaskID& taskId,
const TaskState& state,
const Option<TaskStatus::Reason>& reason = None(),
- const Option<string>& message = None())
+ const Option<string>& message = None(),
+ const Option<TaskResourceLimitation>& limitation = None())
{
TaskStatus status = protobuf::createTaskStatus(
taskId,
@@ -1260,6 +1290,10 @@ private:
status.set_message(message.get());
}
+ if (limitation.isSome()) {
+ status.mutable_limitation()->CopyFrom(limitation.get());
+ }
+
CHECK(containers.contains(taskId));
const Owned<Container>& container = containers.at(taskId);
[8/9] mesos git commit: Printed resource limitations from
mesos-execute.
Posted by jp...@apache.org.
Printed resource limitations from mesos-execute.
If mesos-execute received a status update containing a resource
limitation, print the corresponding resource.
Review: https://reviews.apache.org/r/62647/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/08d94682
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/08d94682
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/08d94682
Branch: refs/heads/master
Commit: 08d94682028111ce016481caa859e79c991120ca
Parents: 6b70393
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:56 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:56 2017 -0700
----------------------------------------------------------------------
src/cli/execute.cpp | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/08d94682/src/cli/execute.cpp
----------------------------------------------------------------------
diff --git a/src/cli/execute.cpp b/src/cli/execute.cpp
index 78b62a6..f5d4bc5 100644
--- a/src/cli/execute.cpp
+++ b/src/cli/execute.cpp
@@ -710,6 +710,10 @@ protected:
if (status.has_check_status()) {
cout << " check status: " << status.check_status() << endl;
}
+ if (status.has_limitation() && !status.limitation().resources().empty()) {
+ cout << " resource limit violation: "
+ << status.limitation().resources() << endl;
+ }
if (status.has_uuid()) {
Call call;
[4/9] mesos git commit: Added the TaskResourceLimitation message.
Posted by jp...@apache.org.
Added the TaskResourceLimitation message.
Added the TaskResourceLimitation message.
Review: https://reviews.apache.org/r/62854/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f3ee9234
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f3ee9234
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f3ee9234
Branch: refs/heads/master
Commit: f3ee9234f645cec540f4387f3ea7f4870b810606
Parents: 7d9781f
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:38 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:38 2017 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 12 ++++++++++++
include/mesos/v1/mesos.proto | 12 ++++++++++++
2 files changed, 24 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/f3ee9234/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 2379c2e..1d346d8 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2125,6 +2125,18 @@ enum TaskState {
/**
+ * Describes a resource limitation that caused a task failure.
+ */
+message TaskResourceLimitation {
+ // This field contains the resource whose limits were violated.
+ //
+ // NOTE: 'Resources' is used here because the resource may span
+ // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+ repeated Resource resources = 1;
+}
+
+
+/**
* Describes the status of a check. Type and the corresponding field, i.e.,
* `command` or `http` must be set. If the result of the check is not available
* (e.g., the check timed out), these fields must contain empty messages, i.e.,
http://git-wip-us.apache.org/repos/asf/mesos/blob/f3ee9234/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index c3581b5..5049b8f 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2106,6 +2106,18 @@ enum TaskState {
/**
+ * Describes a resource limitation that caused a task failure.
+ */
+message TaskResourceLimitation {
+ // This field contains the resource whose limits were violated.
+ //
+ // NOTE: 'Resources' is used here because the resource may span
+ // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+ repeated Resource resources = 1;
+}
+
+
+/**
* Describes the status of a check. Type and the corresponding field, i.e.,
* `command` or `http` must be set. If the result of the check is not available
* (e.g., the check timed out), these fields must contain empty messages, i.e.,
[9/9] mesos git commit: Added a test for nested container resource
limitations.
Posted by jp...@apache.org.
Added a test for nested container resource limitations.
Added a test to verify that resource limitations incurred by a nested
container are propagated all the way back to the scheduler through a
`TASK_FAILED` status update.
Review: https://reviews.apache.org/r/62648/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cc29c27e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cc29c27e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cc29c27e
Branch: refs/heads/master
Commit: cc29c27ee597fbe79206d890d695c0a2e64c46aa
Parents: 08d9468
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:59 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:59 2017 -0700
----------------------------------------------------------------------
src/tests/default_executor_tests.cpp | 116 ++++++++++++++++++++++++++++++
1 file changed, 116 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/cc29c27e/src/tests/default_executor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/default_executor_tests.cpp b/src/tests/default_executor_tests.cpp
index 2b4c643..6831201 100644
--- a/src/tests/default_executor_tests.cpp
+++ b/src/tests/default_executor_tests.cpp
@@ -77,6 +77,7 @@ using testing::WithParamInterface;
using mesos::internal::slave::Containerizer;
using mesos::internal::slave::Fetcher;
using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::Slave;
using mesos::slave::ContainerTermination;
@@ -1362,6 +1363,121 @@ TEST_P(DefaultExecutorTest, ROOT_MultiTaskgroupSharePidNamespace)
#endif // __linux__
+// This test verifies that a resource limitation incurred on a nested
+// container is propagated all the way up to the scheduler.
+TEST_P_TEMP_DISABLED_ON_WINDOWS(
+ DefaultExecutorTest, ResourceLimitation)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ slave::Flags flags = CreateSlaveFlags();
+ flags.containerizers = GetParam();
+ flags.enforce_container_disk_quota = true;
+ flags.container_disk_watch_interval = Milliseconds(1);
+ flags.isolation = "disk/du";
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
+
+ EXPECT_CALL(*scheduler, connected(_))
+ .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO));
+
+ Future<v1::scheduler::Event::Subscribed> subscribed;
+ EXPECT_CALL(*scheduler, subscribed(_, _))
+ .WillOnce(FutureArg<1>(&subscribed));
+
+ Future<v1::scheduler::Event::Offers> offers;
+ EXPECT_CALL(*scheduler, offers(_, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return());
+
+ EXPECT_CALL(*scheduler, heartbeat(_))
+ .WillRepeatedly(Return()); // Ignore heartbeats.
+
+ v1::scheduler::TestMesos mesos(
+ master.get()->pid,
+ ContentType::PROTOBUF,
+ scheduler);
+
+ AWAIT_READY(subscribed);
+ v1::FrameworkID frameworkId(subscribed->framework_id());
+
+ v1::Resources resources =
+ v1::Resources::parse("cpus:0.1;mem:32;disk:10").get();
+
+ v1::ExecutorInfo executorInfo = v1::createExecutorInfo(
+ v1::DEFAULT_EXECUTOR_ID,
+ None(),
+ resources,
+ v1::ExecutorInfo::DEFAULT,
+ frameworkId);
+
+ AWAIT_READY(offers);
+ ASSERT_FALSE(offers->offers().empty());
+
+ const v1::Offer& offer = offers->offers(0);
+ const v1::AgentID& agentId = offer.agent_id();
+
+ Future<v1::scheduler::Event::Update> running;
+ Future<v1::scheduler::Event::Update> failed;
+ EXPECT_CALL(*scheduler, update(_, _))
+ .WillOnce(
+ DoAll(
+ FutureArg<1>(&running),
+ v1::scheduler::SendAcknowledge(frameworkId, agentId)))
+ .WillOnce(
+ DoAll(
+ FutureArg<1>(&failed),
+ v1::scheduler::SendAcknowledge(frameworkId, agentId)));
+
+ // Since we requested 10MB each for the task and the executor,
+ // writing 30MB will violate our disk resource limit.
+ v1::TaskInfo taskInfo = v1::createTask(
+ agentId,
+ resources,
+ "dd if=/dev/zero of=dd.out bs=1048576 count=30; sleep 1000");
+
+ mesos.send(
+ v1::createCallAccept(
+ frameworkId,
+ offer,
+ {v1::LAUNCH_GROUP(
+ executorInfo, v1::createTaskGroupInfo({taskInfo}))}));
+
+ Future<Nothing> ack =
+ FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);
+
+ AWAIT_READY(running);
+ AWAIT_READY(ack);
+
+ EXPECT_EQ(TASK_RUNNING, running->status().state());
+ EXPECT_EQ(taskInfo.task_id(), running->status().task_id());
+
+ AWAIT_READY(failed);
+
+ // We expect the failure to be a disk limitation that tells us something
+ // about the disk resources.
+ EXPECT_EQ(TASK_FAILED, failed->status().state());
+ EXPECT_EQ(
+ TaskStatus::REASON_CONTAINER_LIMITATION_DISK,
+ failed->status().reason());
+
+ EXPECT_EQ(taskInfo.task_id(), failed->status().task_id());
+ ASSERT_TRUE(failed->status().has_limitation());
+ EXPECT_GT(failed->status().limitation().resources().size(), 0);
+
+ foreach (const v1::Resource& resource,
+ failed->status().limitation().resources()) {
+ EXPECT_EQ("disk", resource.name());
+ EXPECT_EQ(mesos::v1::Value::SCALAR, resource.type());
+ }
+}
+
+
struct LauncherAndIsolationParam
{
LauncherAndIsolationParam(const string& _launcher, const string& _isolation)
[6/9] mesos git commit: Added resources to the TaskStatus message.
Posted by jp...@apache.org.
Added resources to the TaskStatus message.
Added a TaskResourcesLimitation field to the `TaskStatus`
message to convey specific information about a resource
limit that has been violated by a container.
This field propagates the resources from the `ContainerLimitation`
message sent by isolators on the agent, and is populated for the
following reasons:
* `REASON_CONTAINER_LIMITATION`
* `REASON_CONTAINER_LIMITATION_DISK`
* `REASON_CONTAINER_LIMITATION_MEMORY`
Review: https://reviews.apache.org/r/62644/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4dd9ad6d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4dd9ad6d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4dd9ad6d
Branch: refs/heads/master
Commit: 4dd9ad6d5eb019e82d9cb00fe749ef7f8af65492
Parents: 98d96ca
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:46 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:46 2017 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 4 ++++
include/mesos/v1/mesos.proto | 4 ++++
src/common/protobuf_utils.cpp | 17 +++++++++++++++-
src/common/protobuf_utils.hpp | 3 ++-
src/slave/slave.cpp | 41 +++++++++++++++++++++++++-------------
5 files changed, 53 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 1d346d8..859fdff 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2296,6 +2296,10 @@ message TaskStatus {
// status updates for tasks running on agents that are unreachable
// (e.g., partitioned away from the master).
optional TimeInfo unreachable_time = 14;
+
+ // If the reason field indicates a container resource limitation,
+ // this field optionally contains additional information.
+ optional TaskResourceLimitation limitation = 16;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 5049b8f..cfd4abd 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2277,6 +2277,10 @@ message TaskStatus {
// status updates for tasks running on agents that are unreachable
// (e.g., partitioned away from the master).
optional TimeInfo unreachable_time = 14;
+
+ // If the reason field indicates a container resource limitation,
+ // this field optionally contains additional information.
+ optional TaskResourceLimitation limitation = 16;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/common/protobuf_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 04f44f6..fd4858a 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -110,7 +110,8 @@ StatusUpdate createStatusUpdate(
const Option<CheckStatusInfo>& checkStatus,
const Option<Labels>& labels,
const Option<ContainerStatus>& containerStatus,
- const Option<TimeInfo>& unreachableTime)
+ const Option<TimeInfo>& unreachableTime,
+ const Option<Resources>& limitedResources)
{
StatusUpdate update;
@@ -168,6 +169,20 @@ StatusUpdate createStatusUpdate(
status->mutable_unreachable_time()->CopyFrom(unreachableTime.get());
}
+ if (limitedResources.isSome()) {
+ // Check that we are only sending the `Limitation` field when the
+ // reason is a container limitation.
+ CHECK_SOME(reason);
+ CHECK(
+ reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION ||
+ reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK ||
+ reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)
+ << reason.get();
+
+ status->mutable_limitation()->mutable_resources()->CopyFrom(
+ limitedResources.get());
+ }
+
return update;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/common/protobuf_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp
index ff0fd01..c43ab75 100644
--- a/src/common/protobuf_utils.hpp
+++ b/src/common/protobuf_utils.hpp
@@ -88,7 +88,8 @@ StatusUpdate createStatusUpdate(
const Option<CheckStatusInfo>& checkStatus = None(),
const Option<Labels>& labels = None(),
const Option<ContainerStatus>& containerStatus = None(),
- const Option<TimeInfo>& unreachableTime = None());
+ const Option<TimeInfo>& unreachableTime = None(),
+ const Option<Resources>& limitedResources = None());
StatusUpdate createStatusUpdate(
http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index df0e894..4d7dc8e 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -6890,9 +6890,10 @@ void Slave::sendExecutorTerminatedStatusUpdate(
TaskStatus::Reason reason;
string message;
+ const bool haveTermination = termination.isReady() && termination->isSome();
+
// Determine the task state for the status update.
- if (termination.isReady() &&
- termination->isSome() && termination->get().has_state()) {
+ if (haveTermination && termination->get().has_state()) {
state = termination->get().state();
} else if (executor->pendingTermination.isSome() &&
executor->pendingTermination->has_state()) {
@@ -6902,8 +6903,7 @@ void Slave::sendExecutorTerminatedStatusUpdate(
}
// Determine the task reason for the status update.
- if (termination.isReady() &&
- termination->isSome() && termination->get().has_reason()) {
+ if (haveTermination && termination->get().has_reason()) {
reason = termination->get().reason();
} else if (executor->pendingTermination.isSome() &&
executor->pendingTermination->has_reason()) {
@@ -6936,16 +6936,29 @@ void Slave::sendExecutorTerminatedStatusUpdate(
message = strings::join("; ", messages);
}
- statusUpdate(protobuf::createStatusUpdate(
- frameworkId,
- info.id(),
- taskId,
- state,
- TaskStatus::SOURCE_SLAVE,
- UUID::random(),
- message,
- reason,
- executor->id),
+ Option<Resources> limitedResources;
+
+ if (haveTermination && !termination->get().limited_resources().empty()) {
+ limitedResources = termination->get().limited_resources();
+ }
+
+ statusUpdate(
+ protobuf::createStatusUpdate(
+ frameworkId,
+ info.id(),
+ taskId,
+ state,
+ TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
+ message,
+ reason,
+ executor->id,
+ None(),
+ None(),
+ None(),
+ None(),
+ None(),
+ limitedResources),
UPID());
}