You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2017/02/09 00:36:23 UTC

[1/5] mesos git commit: Cleaned up a for loop in the agent recovery code.

Repository: mesos
Updated Branches:
  refs/heads/master 348c06bb0 -> 91944b458


Cleaned up a for loop in the agent recovery code.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/91944b45
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/91944b45
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/91944b45

Branch: refs/heads/master
Commit: 91944b45815d44a4e27c2722f67dc4335e29b473
Parents: d1c7fe3
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed Feb 8 16:29:43 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Feb 8 16:36:02 2017 -0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/91944b45/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index d184dd3..ebba8e1 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5295,17 +5295,15 @@ Future<Nothing> Slave::recover(const Try<state::State>& state)
       const FrameworkInfo& frameworkInfo) {
     set<string> roles = protobuf::framework::getRoles(frameworkInfo);
 
-    for (int i = 0; i < resources->size(); ++i) {
-      Resource* resource = resources->Mutable(i);
-
-      if (!resource->has_allocation_info()) {
+    foreach (Resource& resource, *resources) {
+      if (!resource.has_allocation_info()) {
         if (roles.size() != 1) {
           LOG(FATAL) << "Missing 'Resource.AllocationInfo' for resources"
                      << " allocated to MULTI_ROLE framework"
                      << " '" << frameworkInfo.name() << "'";
         }
 
-        resource->mutable_allocation_info()->set_role(*roles.begin());
+        resource.mutable_allocation_info()->set_role(*roles.begin());
       }
     }
   };


[2/5] mesos git commit: Added CHECKs to the agent to ensure allocation info is set.

Posted by bm...@apache.org.
Added CHECKs to the agent to ensure allocation info is set.

The agent has an invariant that all tasks and executors have an
allocation info. This adds CHECKs to ensure this is the case.

Review: https://reviews.apache.org/r/56423


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dc48ed29
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dc48ed29
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dc48ed29

Branch: refs/heads/master
Commit: dc48ed29026bbb3f0329c2b22133ea15a49678ba
Parents: 8362e2f
Author: Benjamin Mahler <bm...@apache.org>
Authored: Tue Feb 7 18:32:35 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Feb 8 16:36:02 2017 -0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/dc48ed29/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index d9c37b0..520fdec 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -6418,6 +6418,13 @@ Executor* Framework::launchExecutor(
     const ExecutorInfo& executorInfo,
     const Option<TaskInfo>& taskInfo)
 {
+  // Verify that Resource.AllocationInfo is set, if coming
+  // from a MULTI_ROLE master this will be set, otherwise
+  // the agent will inject it when receiving the executor.
+  foreach (const Resource& resource, executorInfo.resources()) {
+    CHECK(resource.has_allocation_info());
+  }
+
   // Generate an ID for the executor's container.
   // TODO(idownes) This should be done by the containerizer but we
   // need the ContainerID to create the executor's directory. Fix
@@ -6663,6 +6670,12 @@ void Framework::recoverExecutor(const ExecutorState& state)
     return;
   }
 
+  // Verify that Resource.AllocationInfo is set, this should
+  // be injected by the agent when recovering.
+  foreach (const Resource& resource, state.info->resources()) {
+    CHECK(resource.has_allocation_info());
+  }
+
   // We are only interested in the latest run of the executor!
   // So, we GC all the old runs.
   // NOTE: We don't schedule the top level executor work and meta
@@ -6849,6 +6862,13 @@ Task* Executor::addTask(const TaskInfo& task)
   CHECK(!launchedTasks.contains(task.task_id()))
     << "Duplicate task " << task.task_id();
 
+  // Verify that Resource.AllocationInfo is set, if coming
+  // from a MULTI_ROLE master this will be set, otherwise
+  // the agent will inject it when receiving the task.
+  foreach (const Resource& resource, task.resources()) {
+    CHECK(resource.has_allocation_info());
+  }
+
   Task* t = new Task(protobuf::createTask(task, TASK_STAGING, frameworkId));
 
   launchedTasks[task.task_id()] = t;
@@ -6918,6 +6938,12 @@ void Executor::recoverTask(const TaskState& state)
     return;
   }
 
+  // Verify that Resource.AllocationInfo is set, the agent
+  // should inject it during recovery.
+  foreach (const Resource& resource, state.info->resources()) {
+    CHECK(resource.has_allocation_info());
+  }
+
   launchedTasks[state.id] = new Task(state.info.get());
 
   // NOTE: Since some tasks might have been terminated when the


[5/5] mesos git commit: Inject resource allocation info in agent for received tasks.

Posted by bm...@apache.org.
Inject resource allocation info in agent for received tasks.

Per MESOS-7077, the agent needs to inject allocation info into
tasks and executors coming from an old master to maintain the
invariant that all tasks and executors on the agent have an
allocation info.

Review: https://reviews.apache.org/r/56422


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8362e2f7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8362e2f7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8362e2f7

Branch: refs/heads/master
Commit: 8362e2f7b22bc929fa4ebace304ec533368dc71b
Parents: 55ca107
Author: Benjamin Mahler <bm...@apache.org>
Authored: Tue Feb 7 18:28:57 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Feb 8 16:36:02 2017 -0800

----------------------------------------------------------------------
 src/slave/slave.cpp       | 70 +++++++++++++++++++++++++++++++++++++++---
 src/slave/slave.hpp       |  2 +-
 src/tests/slave_tests.cpp | 32 +++++++++++--------
 3 files changed, 86 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8362e2f7/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 92564ff..d9c37b0 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -1561,7 +1561,7 @@ void Slave::runTask(
 
 void Slave::run(
     const FrameworkInfo& frameworkInfo,
-    const ExecutorInfo& executorInfo,
+    ExecutorInfo executorInfo,
     Option<TaskInfo> task,
     Option<TaskGroupInfo> taskGroup,
     const UPID& pid)
@@ -1569,6 +1569,48 @@ void Slave::run(
   CHECK_NE(task.isSome(), taskGroup.isSome())
     << "Either task or task group should be set but not both";
 
+  auto injectAllocationInfo = [](
+      RepeatedPtrField<Resource>* resources,
+      const FrameworkInfo& frameworkInfo) {
+    set<string> roles = protobuf::framework::getRoles(frameworkInfo);
+
+    foreach (Resource& resource, *resources) {
+      if (!resource.has_allocation_info()) {
+        if (roles.size() != 1) {
+          LOG(FATAL) << "Missing 'Resource.AllocationInfo' for resources"
+                     << " allocated to MULTI_ROLE framework"
+                     << " '" << frameworkInfo.name() << "'";
+        }
+
+        resource.mutable_allocation_info()->set_role(*roles.begin());
+      }
+    }
+  };
+
+  injectAllocationInfo(executorInfo.mutable_resources(), frameworkInfo);
+
+  if (task.isSome()) {
+    injectAllocationInfo(task->mutable_resources(), frameworkInfo);
+
+    if (task->has_executor()) {
+      injectAllocationInfo(
+          task->mutable_executor()->mutable_resources(),
+          frameworkInfo);
+    }
+  }
+
+  if (taskGroup.isSome()) {
+    foreach (TaskInfo& task, *taskGroup->mutable_tasks()) {
+      injectAllocationInfo(task.mutable_resources(), frameworkInfo);
+
+      if (task.has_executor()) {
+        injectAllocationInfo(
+            task.mutable_executor()->mutable_resources(),
+            frameworkInfo);
+      }
+    }
+  }
+
   vector<TaskInfo> tasks;
   if (task.isSome()) {
     tasks.push_back(task.get());
@@ -4548,15 +4590,33 @@ ExecutorInfo Slave::getExecutorInfo(
 
   // Add an allowance for the command executor. This does lead to a
   // small overcommit of resources.
+  //
   // TODO(vinod): If a task is using revocable resources, mark the
   // corresponding executor resource (e.g., cpus) to be also
   // revocable. Currently, it is OK because the containerizer is
   // given task + executor resources on task launch resulting in
   // the container being correctly marked as revocable.
-  executor.mutable_resources()->MergeFrom(
-      Resources::parse(
-        "cpus:" + stringify(DEFAULT_EXECUTOR_CPUS) + ";" +
-        "mem:" + stringify(DEFAULT_EXECUTOR_MEM.megabytes())).get());
+  Resources executorOverhead = Resources::parse(
+      "cpus:" + stringify(DEFAULT_EXECUTOR_CPUS) + ";" +
+      "mem:" + stringify(DEFAULT_EXECUTOR_MEM.megabytes())).get();
+
+  // Inject the task's allocation role into the executor's resources.
+  Option<string> role = None();
+  foreach (const Resource& resource, task.resources()) {
+    CHECK(resource.has_allocation_info());
+
+    if (role.isNone()) {
+      role = resource.allocation_info().role();
+    }
+
+    CHECK_EQ(role.get(), resource.allocation_info().role());
+  }
+
+  CHECK_SOME(role);
+
+  executorOverhead.allocate(role.get());
+
+  executor.mutable_resources()->CopyFrom(executorOverhead);
 
   return executor;
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/8362e2f7/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 75dde73..3b0aea4 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -143,7 +143,7 @@ public:
 
   void run(
       const FrameworkInfo& frameworkInfo,
-      const ExecutorInfo& executorInfo,
+      ExecutorInfo executorInfo,
       Option<TaskInfo> task,
       Option<TaskGroupInfo> taskGroup,
       const process::UPID& pid);

http://git-wip-us.apache.org/repos/asf/mesos/blob/8362e2f7/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 7f357a6..b6f7682 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -776,13 +776,15 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(SlaveTest, GetExecutorInfo)
   frameworkInfo.mutable_id()->CopyFrom(frameworkId);
 
   // Launch a task with the command executor.
+  Resources taskResources = Resources::parse("cpus:0.1;mem:32").get();
+  taskResources.allocate(frameworkInfo.role());
+
   TaskInfo task;
   task.set_name("task");
   task.mutable_task_id()->set_value("1");
   task.mutable_slave_id()->set_value(
       "20141010-221431-251662764-60288-32120-0001");
-  task.mutable_resources()->MergeFrom(
-      Resources::parse("cpus:0.1;mem:32").get());
+  task.mutable_resources()->MergeFrom(taskResources);
 
   CommandInfo command;
   command.set_shell(false);
@@ -828,15 +830,21 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(SlaveTest, GetExecutorInfoForTaskWithContainer)
 
   MockSlave slave(CreateSlaveFlags(), &detector, &containerizer);
 
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.mutable_id()->set_value(
+      "20141010-221431-251662764-60288-12345-0000");
+
   // Launch a task with the command executor and ContainerInfo with
   // NetworkInfo.
+  Resources taskResources = Resources::parse("cpus:0.1;mem:32").get();
+  taskResources.allocate(frameworkInfo.role());
+
   TaskInfo task;
   task.set_name("task");
   task.mutable_task_id()->set_value("1");
   task.mutable_slave_id()->set_value(
       "20141010-221431-251662764-60288-12345-0001");
-  task.mutable_resources()->MergeFrom(
-      Resources::parse("cpus:0.1;mem:32").get());
+  task.mutable_resources()->MergeFrom(taskResources);
 
   CommandInfo command;
   command.set_shell(false);
@@ -853,9 +861,6 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(SlaveTest, GetExecutorInfoForTaskWithContainer)
   network->add_ip_addresses()->set_ip_address("4.3.2.1");
   network->add_groups("public");
 
-  FrameworkInfo frameworkInfo;
-  frameworkInfo.mutable_id()->set_value(
-      "20141010-221431-251662764-60288-12345-0000");
   const ExecutorInfo& executor = slave.getExecutorInfo(frameworkInfo, task);
 
   // Now assert that the executor has both the command and ContainerInfo
@@ -902,6 +907,13 @@ TEST_F(SlaveTest, ROOT_LaunchTaskInfoWithContainerInfo)
   StandaloneMasterDetector detector;
   MockSlave slave(flags, &detector, containerizer.get());
 
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.mutable_id()->set_value(
+      "20141010-221431-251662764-60288-12345-0000");
+
+  Resources taskResources = Resources::parse("cpus:0.1;mem:32").get();
+  taskResources.allocate(frameworkInfo.role());
+
   // Launch a task with the command executor and ContainerInfo with
   // NetworkInfo.
   TaskInfo task;
@@ -909,8 +921,7 @@ TEST_F(SlaveTest, ROOT_LaunchTaskInfoWithContainerInfo)
   task.mutable_task_id()->set_value("1");
   task.mutable_slave_id()->set_value(
       "20141010-221431-251662764-60288-12345-0001");
-  task.mutable_resources()->MergeFrom(
-      Resources::parse("cpus:0.1;mem:32").get());
+  task.mutable_resources()->MergeFrom(taskResources);
 
   CommandInfo command;
   command.set_shell(false);
@@ -930,9 +941,6 @@ TEST_F(SlaveTest, ROOT_LaunchTaskInfoWithContainerInfo)
   network->add_ip_addresses()->set_ip_address("4.3.2.1");
   network->add_groups("public");
 
-  FrameworkInfo frameworkInfo;
-  frameworkInfo.mutable_id()->set_value(
-      "20141010-221431-251662764-60288-12345-0000");
   const ExecutorInfo& executor = slave.getExecutorInfo(frameworkInfo, task);
 
   Try<string> sandbox = environment->mkdtemp();


[4/5] mesos git commit: Removed a stale TODO in the agent code.

Posted by bm...@apache.org.
Removed a stale TODO in the agent code.

This TODO was written before the code below it handled the
optionality of the state `info` fields.

Review: https://reviews.apache.org/r/56424


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d1c7fe31
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d1c7fe31
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d1c7fe31

Branch: refs/heads/master
Commit: d1c7fe31badf8910e0e7c97c6300ddbbf5c1693f
Parents: dc48ed2
Author: Benjamin Mahler <bm...@apache.org>
Authored: Tue Feb 7 18:33:13 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Feb 8 16:36:02 2017 -0800

----------------------------------------------------------------------
 src/slave/slave.cpp | 2 --
 1 file changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d1c7fe31/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 520fdec..d184dd3 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5290,8 +5290,6 @@ Future<Nothing> Slave::recover(const Try<state::State>& state)
   // also must do this for MULTI_ROLE frameworks since they
   // may have tasks that were present before the framework
   // upgraded into MULTI_ROLE.
-  //
-  // TODO(bmahler): When can the `info` fields be None?
   auto injectAllocationInfo = [](
       RepeatedPtrField<Resource>* resources,
       const FrameworkInfo& frameworkInfo) {


[3/5] mesos git commit: Added TODOs to make the agent code safer.

Posted by bm...@apache.org.
Added TODOs to make the agent code safer.

The executor and tasks fields in the agent's structs are publicly
visible, but yet for correctness it is required that callers made
modifications through the member functions which carry additional
logic (e.g. tracking allocated resources). The TODO here is to
make these private and to provide public views that are const.

Review: https://reviews.apache.org/r/56421


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/55ca1072
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/55ca1072
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/55ca1072

Branch: refs/heads/master
Commit: 55ca1072a716b4496e795c871840e2ef942489a8
Parents: 348c06b
Author: Benjamin Mahler <bm...@apache.org>
Authored: Tue Feb 7 18:19:40 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Feb 8 16:36:02 2017 -0800

----------------------------------------------------------------------
 src/slave/slave.hpp | 11 +++++++++++
 1 file changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/55ca1072/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 5049eb7..75dde73 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -988,6 +988,10 @@ struct Executor
   Resources resources;
 
   // Tasks can be found in one of the following four data structures:
+  //
+  // TODO(bmahler): Make these private to enforce that the task
+  // lifecycle helper functions are not being bypassed, and provide
+  // public views into them.
 
   // Not yet launched tasks. This also includes tasks from `queuedTaskGroups`.
   LinkedHashMap<TaskID, TaskInfo> queuedTasks;
@@ -1072,6 +1076,13 @@ struct Framework
   // sent through the master.
   Option<process::UPID> pid;
 
+  // Executors can be found in one of the following
+  // data structures:
+  //
+  // TODO(bmahler): Make these private to enforce that
+  // the executors lifecycle helper functions are not
+  // being bypassed, and provide public views into them.
+
   // Executors with pending tasks.
   hashmap<ExecutorID, hashmap<TaskID, TaskInfo>> pending;