You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by nn...@apache.org on 2015/09/17 21:06:05 UTC

mesos git commit: Added containerId to ResourceUsage to enable the QoS controller to target a container.

Repository: mesos
Updated Branches:
  refs/heads/master 76ce41185 -> 3d8ea3a74


Added containerId to ResourceUsage to enable the QoS controller to target a container.

We should ensure that we are addressing the container which the QoS
controller intended to kill. Without this check, we may run into a
scenario where the executor has terminated and one with the same id has
started in the interim i.e. running in a different container than the
one the QoS controller targeted.

This most likely requires us to add containerId to the ResourceUsage
message and encode the containerID in the QoS Correction message.

Review: https://reviews.apache.org/r/38253


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3d8ea3a7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3d8ea3a7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3d8ea3a7

Branch: refs/heads/master
Commit: 3d8ea3a747dcb4875c571c7f9df1e45e2a20c499
Parents: 76ce411
Author: Klaus Ma <kl...@cguru.net>
Authored: Thu Sep 17 10:53:53 2015 -0700
Committer: Niklas Q. Nielsen <ni...@mesosphere.io>
Committed: Thu Sep 17 10:53:55 2015 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto                  |  3 +++
 include/mesos/slave/oversubscription.proto |  6 +++++-
 src/slave/slave.cpp                        | 16 ++++++++++++++--
 src/tests/oversubscription_tests.cpp       | 21 +++++++++++++++++----
 4 files changed, 39 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8ea3a7/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 899d52f..c8bbf67 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -816,6 +816,9 @@ message ResourceUsage {
     // Current resource usage. If absent, the containerizer
     // cannot provide resource usage.
     optional ResourceStatistics statistics = 3;
+
+    // The container id for the executor specified in the executor_info field
+    required ContainerID container_id = 4;
   }
 
   repeated Executor executors = 1;

http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8ea3a7/include/mesos/slave/oversubscription.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/oversubscription.proto b/include/mesos/slave/oversubscription.proto
index fa69a95..5883167 100644
--- a/include/mesos/slave/oversubscription.proto
+++ b/include/mesos/slave/oversubscription.proto
@@ -38,12 +38,16 @@ message QoSCorrection {
   }
 
   // Kill action which will be performed on an executor.
-  // NOTE: The framework id must be set for the specified executor.
+  // NOTE: Framework ID and either executor id or executor id and container_id
+  // must be set to kill a running container. In the latter case, the caller
+  // can be assured that the kill request doesn't target a new executor reusing
+  // an old executor id.
   // NOTE: In the future we may also kill individual tasks by
   // specifying an optional 'task_id'.
   message Kill {
     optional FrameworkID framework_id = 1;
     optional ExecutorID executor_id = 2;
+    optional ContainerID container_id = 3;
   }
 
   required Type type = 1;

http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8ea3a7/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 93353a1..5aaa12a 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4392,10 +4392,21 @@ void Slave::_qosCorrections(const Future<list<QoSCorrection>>& future)
         continue;
       }
 
+      const ContainerID& containerId =
+          kill.has_container_id() ? kill.container_id() : executor->containerId;
+      if (containerId != executor->containerId) {
+        LOG(WARNING) << "Ignoring QoS correction KILL on container '"
+                     << containerId << "' in executor '"
+                     << executorId << "' of framework " << frameworkId
+                     << ": container cannot be found";
+        continue;
+      }
+
       switch (executor->state) {
         case Executor::REGISTERING:
         case Executor::RUNNING: {
-          LOG(INFO) << "Killing executor '" << executorId
+          LOG(INFO) << "Killing container '" << containerId
+                    << "' in executor '" << executorId
                     << "' of framework " << frameworkId
                     << " as QoS correction";
 
@@ -4408,7 +4419,7 @@ void Slave::_qosCorrections(const Future<list<QoSCorrection>>& future)
           // (MESOS-2875).
           executor->state = Executor::TERMINATING;
           executor->reason = TaskStatus::REASON_EXECUTOR_PREEMPTED;
-          containerizer->destroy(executor->containerId);
+          containerizer->destroy(containerId);
 
           ++metrics.executors_preempted;
           break;
@@ -4447,6 +4458,7 @@ Future<ResourceUsage> Slave::usage()
       ResourceUsage::Executor* entry = usage->add_executors();
       entry->mutable_executor_info()->CopyFrom(executor->info);
       entry->mutable_allocated()->CopyFrom(executor->resources);
+      entry->mutable_container_id()->CopyFrom(executor->containerId);
 
       futures.push_back(containerizer->usage(executor->containerId));
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8ea3a7/src/tests/oversubscription_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/oversubscription_tests.cpp b/src/tests/oversubscription_tests.cpp
index 0c5edaf..561e922 100644
--- a/src/tests/oversubscription_tests.cpp
+++ b/src/tests/oversubscription_tests.cpp
@@ -802,6 +802,12 @@ TEST_F(OversubscriptionTest, QoSCorrectionKill)
         &corrections,
         &Queue<list<mesos::slave::QoSCorrection>>::get));
 
+  Future<lambda::function<Future<ResourceUsage>()>> usageCallback;
+
+  // Catching callback which is passed to the QoS Controller.
+  EXPECT_CALL(controller, initialize(_))
+    .WillOnce(DoAll(FutureArg<0>(&usageCallback), Return(Nothing())));
+
   Try<PID<Slave>> slave = StartSlave(&controller, CreateSlaveFlags());
   ASSERT_SOME(slave);
 
@@ -845,15 +851,22 @@ TEST_F(OversubscriptionTest, QoSCorrectionKill)
   AWAIT_READY(status1);
   ASSERT_EQ(TASK_RUNNING, status1.get().state());
 
+  AWAIT_READY(usageCallback);
+
+  Future<ResourceUsage> usage = usageCallback.get()();
+  AWAIT_READY(usage);
+
+  // Expecting the same statistics as these returned by mocked containerizer.
+  ASSERT_EQ(1, usage.get().executors_size());
+
+  const ResourceUsage::Executor& executor = usage.get().executors(0);
   // Carry out kill correction.
   QoSCorrection killCorrection;
 
   QoSCorrection::Kill* kill = killCorrection.mutable_kill();
   kill->mutable_framework_id()->CopyFrom(frameworkId.get());
-
-  // As we use a command executor to launch an actual sleep command,
-  // the executor id will be the task id.
-  kill->mutable_executor_id()->set_value(task.task_id().value());
+  kill->mutable_executor_id()->CopyFrom(executor.executor_info().executor_id());
+  kill->mutable_container_id()->CopyFrom(executor.container_id());
 
   corrections.put({killCorrection});