You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jp...@apache.org on 2017/10/15 22:48:45 UTC

[1/9] mesos git commit: Stopped keeping multiple limitations in MesosContainerizer.

Repository: mesos
Updated Branches:
  refs/heads/master 1c51c9863 -> cc29c27ee


Stopped keeping multiple limitations in MesosContainerizer.

MesosContainerizer was keeping a vector of ContainerLimitation
objects, but in practice it was not possible to have more than one at
a time, since receiving a limitation initiates a container destroy
and new limitations are dropped once the destroy begins. We replace
the vector<ContainerLimition> with a Option<ContainerLimitation>,
but otherwise preserve all the existing semantics.

Review: https://reviews.apache.org/r/62640/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/14117009
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/14117009
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/14117009

Branch: refs/heads/master
Commit: 1411700928e51aabbb3d38641c9d9f39815918d4
Parents: 1c51c98
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:19 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:19 2017 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 18 +++++-------------
 src/slave/containerizer/mesos/containerizer.hpp |  6 +++---
 2 files changed, 8 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/14117009/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 4d5dc13..4851855 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2440,21 +2440,13 @@ void MesosContainerizerProcess::______destroy(
   // registered. This could occur if the limitation (e.g., an OOM)
   // killed the executor and we triggered destroy() off the executor
   // exit.
-  if (!container->limitations.empty()) {
+  if (container->limitation.isSome()) {
     termination.set_state(TaskState::TASK_FAILED);
+    termination.set_message(container->limitation->message());
 
-    // We concatenate the messages if there are multiple limitations.
-    vector<string> messages;
-
-    foreach (const ContainerLimitation& limitation, container->limitations) {
-      messages.push_back(limitation.message());
-
-      if (limitation.has_reason()) {
-        termination.add_reasons(limitation.reason());
-      }
+    if (container->limitation->has_reason()) {
+      termination.add_reasons(container->limitation->reason());
     }
-
-    termination.set_message(strings::join("; ", messages));
   }
 
   // Now that we are done destroying the container we need to cleanup
@@ -2669,7 +2661,7 @@ void MesosContainerizerProcess::limited(
               << " resource " << future.get().resources()
               << " and will be terminated";
 
-    containers_.at(containerId)->limitations.push_back(future.get());
+    containers_.at(containerId)->limitation = future.get();
   } else {
     // TODO(idownes): A discarded future will not be an error when
     // isolators discard their promises after cleanup.

http://git-wip-us.apache.org/repos/asf/mesos/blob/14117009/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index cc23b4d..ad01a97 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -329,9 +329,9 @@ private:
     // calling cleanup after all isolators have finished isolating.
     process::Future<std::list<Nothing>> isolation;
 
-    // We keep track of any limitations received from each isolator so
-    // we can determine the cause of a container termination.
-    std::vector<mesos::slave::ContainerLimitation> limitations;
+    // We keep track of any limitation received from an isolator
+    // so we can determine the cause of a container termination.
+    Option<mesos::slave::ContainerLimitation> limitation;
 
     // We keep track of the resources for each container so we can set
     // the ResourceStatistics limits in usage().


[5/9] mesos git commit: Propagated the container termination to the agent API.

Posted by jp...@apache.org.
Propagated the container termination to the agent API.

Updated the agent API so that we can propagate information
from the container termination up to the `WaitNestedContainer`
response. We now propagate resources all the way from the
container limitation to the `WaitNestedContainer` response so
that an executor can know specifically which resource limit
violation caused the container termination.

Review: https://reviews.apache.org/r/62643/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/98d96ca9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/98d96ca9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/98d96ca9

Branch: refs/heads/master
Commit: 98d96ca96570eb4d0d1604ba738c24ecc7e71f7f
Parents: f3ee923
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:41 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:41 2017 -0700

----------------------------------------------------------------------
 include/mesos/agent/agent.proto                 | 13 +++++++++++++
 include/mesos/slave/containerizer.proto         |  7 +++++++
 include/mesos/v1/agent/agent.proto              |  9 +++++++++
 src/slave/containerizer/mesos/containerizer.cpp |  5 +++++
 src/slave/http.cpp                              | 17 +++++++++++++++++
 5 files changed, 51 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/agent/agent.proto
----------------------------------------------------------------------
diff --git a/include/mesos/agent/agent.proto b/include/mesos/agent/agent.proto
index 527bcf2..4df3dce 100644
--- a/include/mesos/agent/agent.proto
+++ b/include/mesos/agent/agent.proto
@@ -338,6 +338,19 @@ message Response {
     // family of macros to extract whether the process exited cleanly and
     // what the exit code was.
     optional int32 exit_status = 1;
+
+    // The `state` and `reason` fields may be populated if the Mesos agent
+    // terminates the container. In the absence of any special knowledge,
+    // executors should propagate this information via the `status` field
+    // of an `Update` call for the corresponding TaskID.
+    optional TaskState state = 2;
+    optional TaskStatus.Reason reason = 3;
+
+    // This field will be populated if the task was terminated due to
+    // a resource limitation.
+    optional TaskResourceLimitation limitation = 4;
+
+    optional string message = 5;
   }
 
   optional Type type = 1;

http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/slave/containerizer.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/containerizer.proto b/include/mesos/slave/containerizer.proto
index 4375a38..689acfc 100644
--- a/include/mesos/slave/containerizer.proto
+++ b/include/mesos/slave/containerizer.proto
@@ -247,4 +247,11 @@ message ContainerTermination {
   optional TaskState state = 4;
   optional TaskStatus.Reason reason = 5;
   optional string message = 2;
+
+  // If the container was terminated due to a resource limitation,
+  // this is the resource that caused the termination.
+  //
+  // NOTE: 'Resources' is used here because the resource may span
+  // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+  repeated Resource limited_resources = 6;
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/include/mesos/v1/agent/agent.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/agent/agent.proto b/include/mesos/v1/agent/agent.proto
index be2a2f7..e99d23d 100644
--- a/include/mesos/v1/agent/agent.proto
+++ b/include/mesos/v1/agent/agent.proto
@@ -338,6 +338,15 @@ message Response {
     // family of macros to extract whether the process exited cleanly and
     // what the exit code was.
     optional int32 exit_status = 1;
+
+    optional TaskState state = 2;
+    optional TaskStatus.Reason reason = 3;
+
+    // This field will be populated if the task was terminated due to
+    // a resource limitation.
+    optional TaskResourceLimitation limitation = 4;
+
+    optional string message = 5;
   }
 
   optional Type type = 1;

http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 23caba2..78fdd21 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2685,6 +2685,11 @@ void MesosContainerizerProcess::limited(
     if (future->has_reason()) {
       termination->set_reason(future->reason());
     }
+
+    if (!future->resources().empty()) {
+        termination->mutable_limited_resources()->CopyFrom(
+            future->resources());
+    }
   } else {
     // TODO(idownes): A discarded future will not be an error when
     // isolators discard their promises after cleanup.

http://git-wip-us.apache.org/repos/asf/mesos/blob/98d96ca9/src/slave/http.cpp
----------------------------------------------------------------------
diff --git a/src/slave/http.cpp b/src/slave/http.cpp
index f4c3e6b..f2e06af 100644
--- a/src/slave/http.cpp
+++ b/src/slave/http.cpp
@@ -2512,6 +2512,23 @@ Future<Response> Http::waitNestedContainer(
             waitNestedContainer->set_exit_status(termination->status());
           }
 
+          if (termination->has_state()) {
+            waitNestedContainer->set_state(termination->state());
+          }
+
+          if (termination->has_reason()) {
+            waitNestedContainer->set_reason(termination->reason());
+          }
+
+          if (!termination->limited_resources().empty()) {
+            waitNestedContainer->mutable_limitation()->mutable_resources()
+              ->CopyFrom(termination->limited_resources());
+          }
+
+          if (termination->has_message()) {
+            waitNestedContainer->set_message(termination->message());
+          }
+
           return OK(serialize(acceptType, evolve(response)),
                     stringify(acceptType));
         });


[2/9] mesos git commit: Removed multiple reasons from ContainerTermination.

Posted by jp...@apache.org.
Removed multiple reasons from ContainerTermination.

Since the ContainerLimitation is no longer ever populated with more
than one TaskStatus.Reason, make it optional rather than repeated.

Review: https://reviews.apache.org/r/62641/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0f5328ad
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0f5328ad
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0f5328ad

Branch: refs/heads/master
Commit: 0f5328ad08849694cd0ca897f957852337bba48c
Parents: 1411700
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:24 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:24 2017 -0700

----------------------------------------------------------------------
 include/mesos/slave/containerizer.proto         |  2 +-
 src/slave/containerizer/mesos/containerizer.cpp |  2 +-
 src/slave/slave.cpp                             | 27 ++++++++++----------
 .../containerizer/io_switchboard_tests.cpp      |  4 +--
 4 files changed, 17 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/include/mesos/slave/containerizer.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/containerizer.proto b/include/mesos/slave/containerizer.proto
index 84f9ca7..4375a38 100644
--- a/include/mesos/slave/containerizer.proto
+++ b/include/mesos/slave/containerizer.proto
@@ -245,6 +245,6 @@ message ContainerTermination {
   // The 'state', 'reasons' and 'message' of a status update for
   // non-terminal tasks when the executor is terminated.
   optional TaskState state = 4;
-  repeated TaskStatus.Reason reasons = 5;
+  optional TaskStatus.Reason reason = 5;
   optional string message = 2;
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 4851855..fd7b71e 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -2445,7 +2445,7 @@ void MesosContainerizerProcess::______destroy(
     termination.set_message(container->limitation->message());
 
     if (container->limitation->has_reason()) {
-      termination.add_reasons(container->limitation->reason());
+      termination.set_reason(container->limitation->reason());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index c35cf7d..df0e894 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2493,7 +2493,7 @@ void Slave::___run(
 
       ContainerTermination termination;
       termination.set_state(taskState);
-      termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+      termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
       termination.set_message(
           "Failed to update resources for container: " +
           (future.isFailed() ? future.failure() : "discarded"));
@@ -2753,7 +2753,7 @@ void Slave::launchExecutor(
     // and perform cleanup via `executorTerminated`.
     ContainerTermination termination;
     termination.set_state(TASK_FAILED);
-    termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+    termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
     termination.set_message("Executor " + executorState);
 
     executorTerminated(frameworkId, executorId, termination);
@@ -2774,7 +2774,7 @@ void Slave::launchExecutor(
 
       ContainerTermination termination;
       termination.set_state(TASK_FAILED);
-      termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+      termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
       termination.set_message(
           "Secret generation failed: " +
           (future->isFailed() ? future->failure() : "discarded"));
@@ -4303,7 +4303,7 @@ void Slave::_reregisterExecutor(
 
       ContainerTermination termination;
       termination.set_state(taskState);
-      termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+      termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
       termination.set_message(
           "Failed to update resources for container: " +
           (future.isFailed() ? future.failure() : "discarded"));
@@ -4356,7 +4356,7 @@ void Slave::reregisterExecutorTimeout()
 
           ContainerTermination termination;
           termination.set_state(taskState);
-          termination.add_reasons(
+          termination.set_reason(
               TaskStatus::REASON_EXECUTOR_REREGISTRATION_TIMEOUT);
           termination.set_message(
               "Executor did not re-register within " +
@@ -4737,7 +4737,7 @@ void Slave::__statusUpdate(
 
       ContainerTermination termination;
       termination.set_state(taskState);
-      termination.add_reasons(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
+      termination.set_reason(TaskStatus::REASON_CONTAINER_UPDATE_FAILED);
       termination.set_message(
           "Failed to update resources for container: " +
           (future->isFailed() ? future->failure() : "discarded"));
@@ -5299,7 +5299,7 @@ void Slave::executorLaunched(
     if (executor != nullptr) {
       ContainerTermination termination;
       termination.set_state(TASK_FAILED);
-      termination.add_reasons(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
+      termination.set_reason(TaskStatus::REASON_CONTAINER_LAUNCH_FAILED);
       termination.set_message(
           "Failed to launch container: " +
           (future.isFailed() ? future.failure() : "discarded"));
@@ -5883,7 +5883,7 @@ void Slave::registerExecutorTimeout(
 
       ContainerTermination termination;
       termination.set_state(TASK_FAILED);
-      termination.add_reasons(TaskStatus::REASON_EXECUTOR_REGISTRATION_TIMEOUT);
+      termination.set_reason(TaskStatus::REASON_EXECUTOR_REGISTRATION_TIMEOUT);
       termination.set_message(
           "Executor did not register within " +
           stringify(flags.executor_registration_timeout));
@@ -6681,7 +6681,7 @@ void Slave::_qosCorrections(const Future<list<QoSCorrection>>& future)
 
           ContainerTermination termination;
           termination.set_state(taskState);
-          termination.add_reasons(TaskStatus::REASON_CONTAINER_PREEMPTED);
+          termination.set_reason(TaskStatus::REASON_CONTAINER_PREEMPTED);
           termination.set_message("Container preempted by QoS correction");
 
           executor->pendingTermination = termination;
@@ -6902,13 +6902,12 @@ void Slave::sendExecutorTerminatedStatusUpdate(
   }
 
   // Determine the task reason for the status update.
-  // TODO(jieyu): Handle multiple reasons (MESOS-2657).
   if (termination.isReady() &&
-      termination->isSome() && termination->get().reasons().size() > 0) {
-    reason = termination->get().reasons(0);
+      termination->isSome() && termination->get().has_reason()) {
+    reason = termination->get().reason();
   } else if (executor->pendingTermination.isSome() &&
-             executor->pendingTermination->reasons().size() > 0) {
-    reason = executor->pendingTermination->reasons(0);
+             executor->pendingTermination->has_reason()) {
+    reason = executor->pendingTermination->reason();
   } else {
     reason = TaskStatus::REASON_EXECUTOR_TERMINATED;
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/0f5328ad/src/tests/containerizer/io_switchboard_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/io_switchboard_tests.cpp b/src/tests/containerizer/io_switchboard_tests.cpp
index bf7917a..c3410cd 100644
--- a/src/tests/containerizer/io_switchboard_tests.cpp
+++ b/src/tests/containerizer/io_switchboard_tests.cpp
@@ -900,9 +900,9 @@ TEST_F(IOSwitchboardTest, KillSwitchboardContainerDestroyed)
   ASSERT_TRUE(wait.get()->has_status());
   EXPECT_WTERMSIG_EQ(SIGKILL, wait.get()->status());
 
-  ASSERT_TRUE(wait.get()->reasons().size() == 1);
+  ASSERT_TRUE(wait.get()->has_reason());
   ASSERT_EQ(TaskStatus::REASON_IO_SWITCHBOARD_EXITED,
-            wait.get()->reasons().Get(0));
+            wait.get()->reason());
 
   wait = containerizer->wait(containerId);
 


[3/9] mesos git commit: Propagated the termination info down the container tree.

Posted by jp...@apache.org.
Propagated the termination info down the container tree.

When the MesosContainerizer destroys a container tree, we need to
propagate the ContainerTermination down to all the child containers
so that any executor that is waiting for them can receive enough
information to send a useful status update.

Review: https://reviews.apache.org/r/62642/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7d9781f9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7d9781f9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7d9781f9

Branch: refs/heads/master
Commit: 7d9781f93fcde7e9fb4f8dfcbc1e71514f6b2dc7
Parents: 0f5328a
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:35 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:35 2017 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 85 +++++++++++++-------
 src/slave/containerizer/mesos/containerizer.hpp | 19 +++--
 2 files changed, 66 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7d9781f9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index fd7b71e..23caba2 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -612,7 +612,8 @@ Future<bool> MesosContainerizer::destroy(const ContainerID& containerId)
 {
   return dispatch(process.get(),
                   &MesosContainerizerProcess::destroy,
-                  containerId);
+                  containerId,
+                  None());
 }
 
 
@@ -1003,7 +1004,7 @@ Future<Nothing> MesosContainerizerProcess::__recover(
   // Destroy all the orphan containers.
   foreach (const ContainerID& containerId, orphans) {
     LOG(INFO) << "Cleaning up orphan container " << containerId;
-    destroy(containerId);
+    destroy(containerId, None());
   }
 
   return Nothing();
@@ -2166,7 +2167,8 @@ Future<ContainerStatus> MesosContainerizerProcess::status(
 
 
 Future<bool> MesosContainerizerProcess::destroy(
-    const ContainerID& containerId)
+    const ContainerID& containerId,
+    const Option<ContainerTermination>& termination)
 {
   if (!containers_.contains(containerId)) {
     // This can happen due to the race between destroys initiated by
@@ -2213,12 +2215,12 @@ Future<bool> MesosContainerizerProcess::destroy(
 
   list<Future<bool>> destroys;
   foreach (const ContainerID& child, container->children) {
-    destroys.push_back(destroy(child));
+    destroys.push_back(destroy(child, termination));
   }
 
   await(destroys)
     .then(defer(self(), [=](const list<Future<bool>>& futures) {
-      _destroy(containerId, previousState, futures);
+      _destroy(containerId, termination, previousState, futures);
       return Nothing();
     }));
 
@@ -2234,6 +2236,7 @@ Future<bool> MesosContainerizerProcess::destroy(
 
 void MesosContainerizerProcess::_destroy(
     const ContainerID& containerId,
+    const Option<ContainerTermination>& termination,
     const State& previousState,
     const list<Future<bool>>& destroys)
 {
@@ -2272,6 +2275,7 @@ void MesosContainerizerProcess::_destroy(
           self(),
           &Self::_____destroy,
           containerId,
+          termination,
           list<Future<Nothing>>()));
 
     return;
@@ -2295,7 +2299,7 @@ void MesosContainerizerProcess::_destroy(
           container->status.isSome()
             ? container->status.get()
             : None())
-      .onAny(defer(self(), &Self::____destroy, containerId));
+      .onAny(defer(self(), &Self::____destroy, containerId, termination));
 
     return;
   }
@@ -2307,7 +2311,7 @@ void MesosContainerizerProcess::_destroy(
     // Wait for the isolators to finish isolating before we start
     // to destroy the container.
     container->isolation
-      .onAny(defer(self(), &Self::__destroy, containerId));
+      .onAny(defer(self(), &Self::__destroy, containerId, termination));
 
     return;
   }
@@ -2317,23 +2321,30 @@ void MesosContainerizerProcess::_destroy(
     fetcher->kill(containerId);
   }
 
-  __destroy(containerId);
+  __destroy(containerId, termination);
 }
 
 
 void MesosContainerizerProcess::__destroy(
-    const ContainerID& containerId)
+    const ContainerID& containerId,
+    const Option<ContainerTermination>& termination)
 {
   CHECK(containers_.contains(containerId));
 
   // Kill all processes then continue destruction.
   launcher->destroy(containerId)
-    .onAny(defer(self(), &Self::___destroy, containerId, lambda::_1));
+    .onAny(defer(
+        self(),
+        &Self::___destroy,
+        containerId,
+        termination,
+        lambda::_1));
 }
 
 
 void MesosContainerizerProcess::___destroy(
     const ContainerID& containerId,
+    const Option<ContainerTermination>& termination,
     const Future<Nothing>& future)
 {
   CHECK(containers_.contains(containerId));
@@ -2361,22 +2372,29 @@ void MesosContainerizerProcess::___destroy(
   CHECK_SOME(container->status);
 
   container->status.get()
-    .onAny(defer(self(), &Self::____destroy, containerId));
+    .onAny(defer(self(), &Self::____destroy, containerId, termination));
 }
 
 
 void MesosContainerizerProcess::____destroy(
-    const ContainerID& containerId)
+    const ContainerID& containerId,
+    const Option<ContainerTermination>& termination)
 {
   CHECK(containers_.contains(containerId));
 
   cleanupIsolators(containerId)
-    .onAny(defer(self(), &Self::_____destroy, containerId, lambda::_1));
+    .onAny(defer(
+        self(),
+        &Self::_____destroy,
+        containerId,
+        termination,
+        lambda::_1));
 }
 
 
 void MesosContainerizerProcess::_____destroy(
     const ContainerID& containerId,
+    const Option<ContainerTermination>& termination,
     const Future<list<Future<Nothing>>>& cleanups)
 {
   // This should not occur because we only use the Future<list> to
@@ -2407,12 +2425,18 @@ void MesosContainerizerProcess::_____destroy(
   }
 
   provisioner->destroy(containerId)
-    .onAny(defer(self(), &Self::______destroy, containerId, lambda::_1));
+    .onAny(defer(
+        self(),
+        &Self::______destroy,
+        containerId,
+        termination,
+        lambda::_1));
 }
 
 
 void MesosContainerizerProcess::______destroy(
     const ContainerID& containerId,
+    const Option<ContainerTermination>& _termination,
     const Future<bool>& destroy)
 {
   CHECK(containers_.contains(containerId));
@@ -2430,25 +2454,16 @@ void MesosContainerizerProcess::______destroy(
 
   ContainerTermination termination;
 
+  if (_termination.isSome()) {
+    termination = _termination.get();
+  }
+
   if (container->status.isSome() &&
       container->status->isReady() &&
       container->status->get().isSome()) {
     termination.set_status(container->status->get().get());
   }
 
-  // NOTE: We may not see a limitation in time for it to be
-  // registered. This could occur if the limitation (e.g., an OOM)
-  // killed the executor and we triggered destroy() off the executor
-  // exit.
-  if (container->limitation.isSome()) {
-    termination.set_state(TaskState::TASK_FAILED);
-    termination.set_message(container->limitation->message());
-
-    if (container->limitation->has_reason()) {
-      termination.set_reason(container->limitation->reason());
-    }
-  }
-
   // Now that we are done destroying the container we need to cleanup
   // its runtime directory. There are two cases to consider:
   //
@@ -2530,7 +2545,7 @@ Future<bool> MesosContainerizerProcess::kill(
     LOG(WARNING) << "Unable to find the pid for container " << containerId
                  << ", destroying it";
 
-    destroy(containerId);
+    destroy(containerId, None());
     return true;
   }
 
@@ -2643,7 +2658,7 @@ void MesosContainerizerProcess::reaped(const ContainerID& containerId)
   LOG(INFO) << "Container " << containerId << " has exited";
 
   // The executor has exited so destroy the container.
-  destroy(containerId);
+  destroy(containerId, None());
 }
 
 
@@ -2656,12 +2671,20 @@ void MesosContainerizerProcess::limited(
     return;
   }
 
+  Option<ContainerTermination> termination = None();
+
   if (future.isReady()) {
     LOG(INFO) << "Container " << containerId << " has reached its limit for"
               << " resource " << future.get().resources()
               << " and will be terminated";
 
-    containers_.at(containerId)->limitation = future.get();
+    termination = ContainerTermination();
+    termination->set_state(TaskState::TASK_FAILED);
+    termination->set_message(future->message());
+
+    if (future->has_reason()) {
+      termination->set_reason(future->reason());
+    }
   } else {
     // TODO(idownes): A discarded future will not be an error when
     // isolators discard their promises after cleanup.
@@ -2671,7 +2694,7 @@ void MesosContainerizerProcess::limited(
   }
 
   // The container has been affected by the limitation so destroy it.
-  destroy(containerId);
+  destroy(containerId, termination);
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/7d9781f9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index ad01a97..6d356cc 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -170,7 +170,8 @@ public:
       int_fd pipeWrite);
 
   virtual process::Future<bool> destroy(
-      const ContainerID& containerId);
+      const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination);
 
   virtual process::Future<bool> kill(
       const ContainerID& containerId,
@@ -229,30 +230,38 @@ private:
   // Continues 'destroy()' once nested containers are handled.
   void _destroy(
       const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination,
       const State& previousState,
       const std::list<process::Future<bool>>& destroys);
 
   // Continues '_destroy()' once isolators has completed.
-  void __destroy(const ContainerID& containerId);
+  void __destroy(
+      const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination);
 
   // Continues '__destroy()' once all processes have been killed
   // by the launcher.
   void ___destroy(
       const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination,
       const process::Future<Nothing>& future);
 
   // Continues '___destroy()' once we get the exit status of the container.
-  void ____destroy(const ContainerID& containerId);
+  void ____destroy(
+      const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination);
 
   // Continues '____destroy()' once all isolators have completed
   // cleanup.
   void _____destroy(
       const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination,
       const process::Future<std::list<process::Future<Nothing>>>& cleanups);
 
   // Continues '_____destroy()' once provisioner have completed destroy.
   void ______destroy(
       const ContainerID& containerId,
+      const Option<mesos::slave::ContainerTermination>& termination,
       const process::Future<bool>& destroy);
 
   // Call back for when an isolator limits a container and impacts the
@@ -329,10 +338,6 @@ private:
     // calling cleanup after all isolators have finished isolating.
     process::Future<std::list<Nothing>> isolation;
 
-    // We keep track of any limitation received from an isolator
-    // so we can determine the cause of a container termination.
-    Option<mesos::slave::ContainerLimitation> limitation;
-
     // We keep track of the resources for each container so we can set
     // the ResourceStatistics limits in usage().
     Resources resources;


[7/9] mesos git commit: Propagated new wait response fields from the default executor.

Posted by jp...@apache.org.
Propagated new wait response fields from the default executor.

In the default executor, propagate newly added `WaitNestedContainer`
response fields to the corresponding status update. This allows
frameworks to receive more reliable information about nested
container failures.

Review: https://reviews.apache.org/r/62646/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6b703932
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6b703932
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6b703932

Branch: refs/heads/master
Commit: 6b7039323ae434211ac977d14e3c21a966566f58
Parents: 4dd9ad6
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:53 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:53 2017 -0700

----------------------------------------------------------------------
 src/launcher/default_executor.cpp | 40 +++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6b703932/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index a376c09..e58766f 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -799,6 +799,8 @@ protected:
 
     TaskState taskState;
     Option<string> message;
+    Option<TaskStatus::Reason> reason;
+    Option<TaskResourceLimitation> limitation;
 
     if (!waitResponse->wait_nested_container().has_exit_status()) {
       taskState = TASK_FAILED;
@@ -822,11 +824,38 @@ protected:
       message = "Command " + WSTRINGIFY(status);
     }
 
+    // Note that we always prefer the task state and reason from the
+    // agent response over what we can determine ourselves because
+    // in general, the agent has more specific information about why
+    // the container exited (e.g. this might be a container resource
+    // limitation).
+    if (waitResponse->wait_nested_container().has_state()) {
+      taskState = waitResponse->wait_nested_container().state();
+    }
+
+    if (waitResponse->wait_nested_container().has_reason()) {
+      reason = waitResponse->wait_nested_container().reason();
+    }
+
+    if (waitResponse->wait_nested_container().has_message()) {
+      if (message.isSome()) {
+        message->append(
+            ": " +  waitResponse->wait_nested_container().message());
+      } else {
+        message = waitResponse->wait_nested_container().message();
+      }
+    }
+
+    if (waitResponse->wait_nested_container().has_limitation()) {
+      limitation = waitResponse->wait_nested_container().limitation();
+    }
+
     TaskStatus taskStatus = createTaskStatus(
         taskId,
         taskState,
-        None(),
-        message);
+        reason,
+        message,
+        limitation);
 
     // Indicate that a task has been unhealthy upon termination.
     if (unhealthy) {
@@ -1241,7 +1270,8 @@ private:
       const TaskID& taskId,
       const TaskState& state,
       const Option<TaskStatus::Reason>& reason = None(),
-      const Option<string>& message = None())
+      const Option<string>& message = None(),
+      const Option<TaskResourceLimitation>& limitation = None())
   {
     TaskStatus status = protobuf::createTaskStatus(
         taskId,
@@ -1260,6 +1290,10 @@ private:
       status.set_message(message.get());
     }
 
+    if (limitation.isSome()) {
+      status.mutable_limitation()->CopyFrom(limitation.get());
+    }
+
     CHECK(containers.contains(taskId));
     const Owned<Container>& container = containers.at(taskId);
 


[8/9] mesos git commit: Printed resource limitations from mesos-execute.

Posted by jp...@apache.org.
Printed resource limitations from mesos-execute.

If mesos-execute received a status update containing a resource
limitation, print the corresponding resource.

Review: https://reviews.apache.org/r/62647/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/08d94682
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/08d94682
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/08d94682

Branch: refs/heads/master
Commit: 08d94682028111ce016481caa859e79c991120ca
Parents: 6b70393
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:56 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:56 2017 -0700

----------------------------------------------------------------------
 src/cli/execute.cpp | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/08d94682/src/cli/execute.cpp
----------------------------------------------------------------------
diff --git a/src/cli/execute.cpp b/src/cli/execute.cpp
index 78b62a6..f5d4bc5 100644
--- a/src/cli/execute.cpp
+++ b/src/cli/execute.cpp
@@ -710,6 +710,10 @@ protected:
     if (status.has_check_status()) {
       cout << "  check status: " << status.check_status() << endl;
     }
+    if (status.has_limitation() && !status.limitation().resources().empty()) {
+      cout << "  resource limit violation: "
+           << status.limitation().resources() << endl;
+    }
 
     if (status.has_uuid()) {
       Call call;


[4/9] mesos git commit: Added the TaskResourceLimitation message.

Posted by jp...@apache.org.
Added the TaskResourceLimitation message.

Added the TaskResourceLimitation message.

Review: https://reviews.apache.org/r/62854/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f3ee9234
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f3ee9234
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f3ee9234

Branch: refs/heads/master
Commit: f3ee9234f645cec540f4387f3ea7f4870b810606
Parents: 7d9781f
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:38 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:38 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto    | 12 ++++++++++++
 include/mesos/v1/mesos.proto | 12 ++++++++++++
 2 files changed, 24 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f3ee9234/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 2379c2e..1d346d8 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2125,6 +2125,18 @@ enum TaskState {
 
 
 /**
+ * Describes a resource limitation that caused a task failure.
+ */
+message TaskResourceLimitation {
+  // This field contains the resource whose limits were violated.
+  //
+  // NOTE: 'Resources' is used here because the resource may span
+  // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+  repeated Resource resources = 1;
+}
+
+
+/**
 * Describes the status of a check. Type and the corresponding field, i.e.,
 * `command` or `http` must be set. If the result of the check is not available
 * (e.g., the check timed out), these fields must contain empty messages, i.e.,

http://git-wip-us.apache.org/repos/asf/mesos/blob/f3ee9234/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index c3581b5..5049b8f 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2106,6 +2106,18 @@ enum TaskState {
 
 
 /**
+ * Describes a resource limitation that caused a task failure.
+ */
+message TaskResourceLimitation {
+  // This field contains the resource whose limits were violated.
+  //
+  // NOTE: 'Resources' is used here because the resource may span
+  // multiple roles (e.g. `"mem(*):1;mem(role):2"`).
+  repeated Resource resources = 1;
+}
+
+
+/**
 * Describes the status of a check. Type and the corresponding field, i.e.,
 * `command` or `http` must be set. If the result of the check is not available
 * (e.g., the check timed out), these fields must contain empty messages, i.e.,


[9/9] mesos git commit: Added a test for nested container resource limitations.

Posted by jp...@apache.org.
Added a test for nested container resource limitations.

Added a test to verify that resource limitations incurred by a nested
container are propagated all the way back to the scheduler through a
`TASK_FAILED` status update.

Review: https://reviews.apache.org/r/62648/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cc29c27e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cc29c27e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cc29c27e

Branch: refs/heads/master
Commit: cc29c27ee597fbe79206d890d695c0a2e64c46aa
Parents: 08d9468
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:59 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:59 2017 -0700

----------------------------------------------------------------------
 src/tests/default_executor_tests.cpp | 116 ++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cc29c27e/src/tests/default_executor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/default_executor_tests.cpp b/src/tests/default_executor_tests.cpp
index 2b4c643..6831201 100644
--- a/src/tests/default_executor_tests.cpp
+++ b/src/tests/default_executor_tests.cpp
@@ -77,6 +77,7 @@ using testing::WithParamInterface;
 using mesos::internal::slave::Containerizer;
 using mesos::internal::slave::Fetcher;
 using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::Slave;
 
 using mesos::slave::ContainerTermination;
 
@@ -1362,6 +1363,121 @@ TEST_P(DefaultExecutorTest, ROOT_MultiTaskgroupSharePidNamespace)
 #endif // __linux__
 
 
+// This test verifies that a resource limitation incurred on a nested
+// container is propagated all the way up to the scheduler.
+TEST_P_TEMP_DISABLED_ON_WINDOWS(
+    DefaultExecutorTest, ResourceLimitation)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.containerizers = GetParam();
+  flags.enforce_container_disk_quota = true;
+  flags.container_disk_watch_interval = Milliseconds(1);
+  flags.isolation = "disk/du";
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+  ASSERT_SOME(slave);
+
+  auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
+
+  EXPECT_CALL(*scheduler, connected(_))
+    .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO));
+
+  Future<v1::scheduler::Event::Subscribed> subscribed;
+  EXPECT_CALL(*scheduler, subscribed(_, _))
+    .WillOnce(FutureArg<1>(&subscribed));
+
+  Future<v1::scheduler::Event::Offers> offers;
+  EXPECT_CALL(*scheduler, offers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return());
+
+  EXPECT_CALL(*scheduler, heartbeat(_))
+    .WillRepeatedly(Return()); // Ignore heartbeats.
+
+  v1::scheduler::TestMesos mesos(
+      master.get()->pid,
+      ContentType::PROTOBUF,
+      scheduler);
+
+  AWAIT_READY(subscribed);
+  v1::FrameworkID frameworkId(subscribed->framework_id());
+
+  v1::Resources resources =
+    v1::Resources::parse("cpus:0.1;mem:32;disk:10").get();
+
+  v1::ExecutorInfo executorInfo = v1::createExecutorInfo(
+      v1::DEFAULT_EXECUTOR_ID,
+      None(),
+      resources,
+      v1::ExecutorInfo::DEFAULT,
+      frameworkId);
+
+  AWAIT_READY(offers);
+  ASSERT_FALSE(offers->offers().empty());
+
+  const v1::Offer& offer = offers->offers(0);
+  const v1::AgentID& agentId = offer.agent_id();
+
+  Future<v1::scheduler::Event::Update> running;
+  Future<v1::scheduler::Event::Update> failed;
+  EXPECT_CALL(*scheduler, update(_, _))
+    .WillOnce(
+        DoAll(
+            FutureArg<1>(&running),
+            v1::scheduler::SendAcknowledge(frameworkId, agentId)))
+    .WillOnce(
+        DoAll(
+            FutureArg<1>(&failed),
+            v1::scheduler::SendAcknowledge(frameworkId, agentId)));
+
+  // Since we requested 10MB each for the task and the executor,
+  // writing 30MB will violate our disk resource limit.
+  v1::TaskInfo taskInfo = v1::createTask(
+      agentId,
+      resources,
+      "dd if=/dev/zero of=dd.out bs=1048576 count=30; sleep 1000");
+
+  mesos.send(
+      v1::createCallAccept(
+          frameworkId,
+          offer,
+          {v1::LAUNCH_GROUP(
+              executorInfo, v1::createTaskGroupInfo({taskInfo}))}));
+
+  Future<Nothing> ack =
+    FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement);
+
+  AWAIT_READY(running);
+  AWAIT_READY(ack);
+
+  EXPECT_EQ(TASK_RUNNING, running->status().state());
+  EXPECT_EQ(taskInfo.task_id(), running->status().task_id());
+
+  AWAIT_READY(failed);
+
+  // We expect the failure to be a disk limitation that tells us something
+  // about the disk resources.
+  EXPECT_EQ(TASK_FAILED, failed->status().state());
+  EXPECT_EQ(
+      TaskStatus::REASON_CONTAINER_LIMITATION_DISK,
+      failed->status().reason());
+
+  EXPECT_EQ(taskInfo.task_id(), failed->status().task_id());
+  ASSERT_TRUE(failed->status().has_limitation());
+  EXPECT_GT(failed->status().limitation().resources().size(), 0);
+
+  foreach (const v1::Resource& resource,
+           failed->status().limitation().resources()) {
+    EXPECT_EQ("disk", resource.name());
+    EXPECT_EQ(mesos::v1::Value::SCALAR, resource.type());
+  }
+}
+
+
 struct LauncherAndIsolationParam
 {
   LauncherAndIsolationParam(const string& _launcher, const string& _isolation)


[6/9] mesos git commit: Added resources to the TaskStatus message.

Posted by jp...@apache.org.
Added resources to the TaskStatus message.

Added a TaskResourcesLimitation field to the `TaskStatus`
message to convey specific information about a resource
limit that has been violated by a container.

This field propagates the resources from the `ContainerLimitation`
message sent by isolators on the agent, and is populated for the
following reasons:

  * `REASON_CONTAINER_LIMITATION`
  * `REASON_CONTAINER_LIMITATION_DISK`
  * `REASON_CONTAINER_LIMITATION_MEMORY`

Review: https://reviews.apache.org/r/62644/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4dd9ad6d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4dd9ad6d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4dd9ad6d

Branch: refs/heads/master
Commit: 4dd9ad6d5eb019e82d9cb00fe749ef7f8af65492
Parents: 98d96ca
Author: James Peach <jp...@apache.org>
Authored: Sun Oct 15 15:21:46 2017 -0700
Committer: James Peach <jp...@apache.org>
Committed: Sun Oct 15 15:21:46 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto     |  4 ++++
 include/mesos/v1/mesos.proto  |  4 ++++
 src/common/protobuf_utils.cpp | 17 +++++++++++++++-
 src/common/protobuf_utils.hpp |  3 ++-
 src/slave/slave.cpp           | 41 +++++++++++++++++++++++++-------------
 5 files changed, 53 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 1d346d8..859fdff 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -2296,6 +2296,10 @@ message TaskStatus {
   // status updates for tasks running on agents that are unreachable
   // (e.g., partitioned away from the master).
   optional TimeInfo unreachable_time = 14;
+
+  // If the reason field indicates a container resource limitation,
+  // this field optionally contains additional information.
+  optional TaskResourceLimitation limitation = 16;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 5049b8f..cfd4abd 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -2277,6 +2277,10 @@ message TaskStatus {
   // status updates for tasks running on agents that are unreachable
   // (e.g., partitioned away from the master).
   optional TimeInfo unreachable_time = 14;
+
+  // If the reason field indicates a container resource limitation,
+  // this field optionally contains additional information.
+  optional TaskResourceLimitation limitation = 16;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/common/protobuf_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 04f44f6..fd4858a 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -110,7 +110,8 @@ StatusUpdate createStatusUpdate(
     const Option<CheckStatusInfo>& checkStatus,
     const Option<Labels>& labels,
     const Option<ContainerStatus>& containerStatus,
-    const Option<TimeInfo>& unreachableTime)
+    const Option<TimeInfo>& unreachableTime,
+    const Option<Resources>& limitedResources)
 {
   StatusUpdate update;
 
@@ -168,6 +169,20 @@ StatusUpdate createStatusUpdate(
     status->mutable_unreachable_time()->CopyFrom(unreachableTime.get());
   }
 
+  if (limitedResources.isSome()) {
+    // Check that we are only sending the `Limitation` field when the
+    // reason is a container limitation.
+    CHECK_SOME(reason);
+    CHECK(
+        reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION ||
+        reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_DISK ||
+        reason.get() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)
+      << reason.get();
+
+    status->mutable_limitation()->mutable_resources()->CopyFrom(
+        limitedResources.get());
+  }
+
   return update;
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/common/protobuf_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp
index ff0fd01..c43ab75 100644
--- a/src/common/protobuf_utils.hpp
+++ b/src/common/protobuf_utils.hpp
@@ -88,7 +88,8 @@ StatusUpdate createStatusUpdate(
     const Option<CheckStatusInfo>& checkStatus = None(),
     const Option<Labels>& labels = None(),
     const Option<ContainerStatus>& containerStatus = None(),
-    const Option<TimeInfo>& unreachableTime = None());
+    const Option<TimeInfo>& unreachableTime = None(),
+    const Option<Resources>& limitedResources = None());
 
 
 StatusUpdate createStatusUpdate(

http://git-wip-us.apache.org/repos/asf/mesos/blob/4dd9ad6d/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index df0e894..4d7dc8e 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -6890,9 +6890,10 @@ void Slave::sendExecutorTerminatedStatusUpdate(
   TaskStatus::Reason reason;
   string message;
 
+  const bool haveTermination = termination.isReady() && termination->isSome();
+
   // Determine the task state for the status update.
-  if (termination.isReady() &&
-      termination->isSome() && termination->get().has_state()) {
+  if (haveTermination && termination->get().has_state()) {
     state = termination->get().state();
   } else if (executor->pendingTermination.isSome() &&
              executor->pendingTermination->has_state()) {
@@ -6902,8 +6903,7 @@ void Slave::sendExecutorTerminatedStatusUpdate(
   }
 
   // Determine the task reason for the status update.
-  if (termination.isReady() &&
-      termination->isSome() && termination->get().has_reason()) {
+  if (haveTermination && termination->get().has_reason()) {
     reason = termination->get().reason();
   } else if (executor->pendingTermination.isSome() &&
              executor->pendingTermination->has_reason()) {
@@ -6936,16 +6936,29 @@ void Slave::sendExecutorTerminatedStatusUpdate(
     message = strings::join("; ", messages);
   }
 
-  statusUpdate(protobuf::createStatusUpdate(
-      frameworkId,
-      info.id(),
-      taskId,
-      state,
-      TaskStatus::SOURCE_SLAVE,
-      UUID::random(),
-      message,
-      reason,
-      executor->id),
+  Option<Resources> limitedResources;
+
+  if (haveTermination && !termination->get().limited_resources().empty()) {
+    limitedResources = termination->get().limited_resources();
+  }
+
+  statusUpdate(
+      protobuf::createStatusUpdate(
+          frameworkId,
+          info.id(),
+          taskId,
+          state,
+          TaskStatus::SOURCE_SLAVE,
+          UUID::random(),
+          message,
+          reason,
+          executor->id,
+          None(),
+          None(),
+          None(),
+          None(),
+          None(),
+          limitedResources),
       UPID());
 }