You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mesos.apache.org by bm...@apache.org on 2016/08/24 01:52:16 UTC

[1/2] mesos git commit: Removed a hard CHECK for ExecutorInfo.framework_id.

Repository: mesos
Updated Branches:
  refs/heads/master 3ed895e1e -> 9460d097b


Removed a hard CHECK for ExecutorInfo.framework_id.

Review: https://reviews.apache.org/r/51319


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9460d097
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9460d097
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9460d097

Branch: refs/heads/master
Commit: 9460d097b7479c265577ebb75e0dc94dc8f2228d
Parents: cbdaa07
Author: Benjamin Mahler <bm...@apache.org>
Authored: Mon Aug 22 21:41:34 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Aug 23 18:51:49 2016 -0700

----------------------------------------------------------------------
 src/master/validation.cpp             |  8 +++--
 src/tests/master_validation_tests.cpp | 58 +++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9460d097/src/master/validation.cpp
----------------------------------------------------------------------
diff --git a/src/master/validation.cpp b/src/master/validation.cpp
index 3e26c32..3d2965e 100644
--- a/src/master/validation.cpp
+++ b/src/master/validation.cpp
@@ -603,9 +603,11 @@ Option<Error> validateFrameworkID(
 {
   CHECK_NOTNULL(framework);
 
-  // Master ensures `ExecutorInfo.framework_id`
-  // is set before calling this method.
-  CHECK(executor.has_framework_id());
+  // The master fills in `ExecutorInfo.framework_id` for
+  // executors used in Launch operations.
+  if (!executor.has_framework_id()) {
+    return Error("'ExecutorInfo.framework_id' must be set");
+  }
 
   if (executor.framework_id() != framework->id()) {
     return Error(

http://git-wip-us.apache.org/repos/asf/mesos/blob/9460d097/src/tests/master_validation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp
index e1a5030..86b4b22 100644
--- a/src/tests/master_validation_tests.cpp
+++ b/src/tests/master_validation_tests.cpp
@@ -617,7 +617,7 @@ TEST_F(DestroyOperationValidationTest, UnknownPersistentVolume)
 class TaskValidationTest : public MesosTest {};
 
 
-TEST_F(TaskValidationTest, TaskUsesInvalidFrameworkID)
+TEST_F(TaskValidationTest, ExecutorUsesInvalidFrameworkID)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -657,6 +657,62 @@ TEST_F(TaskValidationTest, TaskUsesInvalidFrameworkID)
 }
 
 
+// The master should fill in the `ExecutorInfo.framework_id`
+// if it is not set by the framework.
+TEST_F(TaskValidationTest, ExecutorMissingFrameworkID)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Start the first slave.
+  MockExecutor exec(DEFAULT_EXECUTOR_ID);
+  TestContainerizer containerizer(&exec);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  // Create an executor with a missing framework id.
+  ExecutorInfo executor;
+  executor = DEFAULT_EXECUTOR_INFO;
+  executor.clear_framework_id();
+
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(LaunchTasks(executor, 1, 1, 16, "*"))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  EXPECT_CALL(exec, registered(_, _, _, _))
+    .Times(1);
+
+  EXPECT_CALL(exec, launchTask(_, _))
+    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+  Future<TaskStatus> status;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&status));
+
+  driver.start();
+
+  // The task should pass validation since the framework id
+  // is filled in, and when it reaches the dummy executor
+  // it will fail because the executor just exits.
+  AWAIT_READY(status);
+  EXPECT_EQ(TASK_RUNNING, status->state());
+
+  EXPECT_CALL(exec, shutdown(_))
+    .Times(AtMost(1));
+
+  driver.stop();
+  driver.join();
+}
+
+
 TEST_F(TaskValidationTest, TaskUsesCommandInfoAndExecutorInfo)
 {
   Try<Owned<cluster::Master>> master = StartMaster();

[2/2] mesos git commit: Documented a bug with the use of `pendingTasks` in the master.

Posted by bm...@apache.org.

Documented a bug with the use of `pendingTasks` in the master.

The use of `pendingTasks` cannot distinguish between a duplicate
TaskID and a task that has been killed while pending. This means
that if an invalid or unauthorized task is killed while pending,
TASK_KILLED is sent, and once in Master::_accept, we will also
send TASK_ERROR.

Review: https://reviews.apache.org/r/51317


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cbdaa076
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cbdaa076
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cbdaa076

Branch: refs/heads/master
Commit: cbdaa07648cc2571378f2124630dc5533269fe68
Parents: 3ed895e
Author: Benjamin Mahler <bm...@apache.org>
Authored: Mon Aug 22 20:23:25 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Aug 23 18:51:49 2016 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cbdaa076/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index f62be4e..910293a 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -3357,11 +3357,13 @@ void Master::accept(
 
           // Add to pending tasks.
           //
-          // NOTE: The task ID here hasn't been validated yet, but it
-          // doesn't matter. If the task ID is not valid, the task won't
-          // be launched anyway. If two tasks have the same ID, the second
-          // one will not be put into 'framework->pendingTasks', therefore
-          // will not be launched.
+          // NOTE: If two tasks have the same ID, the second one will
+          // not be put into 'framework->pendingTasks', therefore
+          // will not be launched (and TASK_ERROR will be sent).
+          // Unfortunately, we can't tell the difference between a
+          // duplicate TaskID and getting killed while pending
+          // (removed from the map). So it's possible that we send
+          // a TASK_ERROR after a TASK_KILLED (see _accept())!
           if (!framework->pendingTasks.contains(task.task_id())) {
             framework->pendingTasks[task.task_id()] = task;
           }
@@ -3751,15 +3753,20 @@ void Master::_accept(
           Future<bool> authorization = authorizations.front();
           authorizations.pop_front();
 
-          // NOTE: The task will not be in 'pendingTasks' if
-          // 'killTask()' for the task was called before we are here.
-          // No need to launch the task if it's no longer pending.
-          // However, we still need to check the authorization result
-          // and do the validation so that we can send status update
-          // in case the task has duplicated ID.
-          bool pending = framework->pendingTasks.contains(task.task_id());
+          // The task will not be in `pendingTasks` if it has been
+          // killed in the interim. No need to send TASK_KILLED in
+          // this case as it has already been sent. Note however that
+          // we cannot currently distinguish between the task being
+          // killed and the task having a duplicate TaskID within
+          // `pendingTasks`. Therefore we must still validate the task
+          // to ensure we send the TASK_ERROR in the case that it has a
+          // duplicate TaskID.
+          //
+          // TODO(bmahler): We may send TASK_ERROR after a TASK_KILLED
+          // if a task was killed (removed from `pendingTasks`) *and*
+          // the task is invalid or unauthorized here.
 
-          // Remove from pending tasks.
+          bool pending = framework->pendingTasks.contains(task.task_id());
           framework->pendingTasks.erase(task.task_id());
 
           CHECK(!authorization.isDiscarded());