You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by mp...@apache.org on 2017/01/25 11:07:02 UTC

[2/2] mesos git commit: Disallow multi-role frameworks to change their roles.

Disallow multi-role frameworks to change their roles.

We currently do not allow `MULTI_ROLE` frameworks to change their roles.
This restriction will be lifted later.

Review: https://reviews.apache.org/r/55271/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/63a3c149
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/63a3c149
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/63a3c149

Branch: refs/heads/master
Commit: 63a3c149841826098e027f5be03fb79c875ba8f3
Parents: 9a6782f
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Wed Jan 25 00:00:52 2017 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Wed Jan 25 03:05:50 2017 -0800

----------------------------------------------------------------------
 src/master/master.hpp                 |  46 ++++-
 src/tests/master_validation_tests.cpp | 259 +++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/63a3c149/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4d20524..cb1b82d 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -21,6 +21,7 @@
 
 #include <list>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
 
@@ -2379,6 +2380,46 @@ struct Framework
     // TODO(jmlvanre): Merge other fields as per design doc in
     // MESOS-703.
 
+    // We currently do not allow frameworks to add or remove roles. We
+    // do however allow frameworks to opt in and out of `MULTI_ROLE`
+    // capability, given that the `role` and `roles` field contain the
+    // same number of roles.
+    if (protobuf::frameworkHasCapability(
+            source, FrameworkInfo::Capability::MULTI_ROLE) ||
+        protobuf::frameworkHasCapability(
+            info, FrameworkInfo::Capability::MULTI_ROLE)) {
+      // Two `roles` sets are equivalent if they contain the same
+      // elements. A `role` `*` is not equivalent to an empty `roles`
+      // set, but to the set `{*}`. Since we might be dealing with a
+      // framework upgrading to `MULTI_ROLE` capability or dropping
+      // it, we need to examine either `role` or `roles` in order to
+      // determine the roles a framework is subscribed to.
+      const std::set<std::string> newRoles =
+        protobuf::frameworkHasCapability(
+            source, FrameworkInfo::Capability::MULTI_ROLE)
+          ? std::set<std::string>(
+                {source.roles().begin(), source.roles().end()})
+          : std::set<std::string>({source.role()});
+
+      const std::set<std::string> oldRoles =
+        protobuf::frameworkHasCapability(
+            info, FrameworkInfo::Capability::MULTI_ROLE)
+          ? std::set<std::string>({info.roles().begin(), info.roles().end()})
+          : std::set<std::string>({info.role()});
+
+      if (oldRoles != newRoles) {
+        return Error(
+            "Frameworks cannot change their roles: expected '" +
+            stringify(oldRoles) + "', but got '" + stringify(newRoles) + "'");
+      }
+    } else {
+      if (source.role() != info.role()) {
+        LOG(WARNING) << "Cannot update FrameworkInfo.role to '" << source.role()
+                     << "' for framework " << id() << ". Check MESOS-703";
+      }
+    }
+
+
     if (source.user() != info.user()) {
       LOG(WARNING) << "Cannot update FrameworkInfo.user to '" << source.user()
                    << "' for framework " << id() << ". Check MESOS-703";
@@ -2398,11 +2439,6 @@ struct Framework
                    << id() << ". Check MESOS-703";
     }
 
-    if (source.role() != info.role()) {
-      LOG(WARNING) << "Cannot update FrameworkInfo.role to '" << source.role()
-                   << "' for framework " << id() << ". Check MESOS-703";
-    }
-
     if (source.has_hostname()) {
       info.set_hostname(source.hostname());
     } else {

http://git-wip-us.apache.org/repos/asf/mesos/blob/63a3c149/src/tests/master_validation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp
index ce10ea4..914b3c5 100644
--- a/src/tests/master_validation_tests.cpp
+++ b/src/tests/master_validation_tests.cpp
@@ -47,6 +47,8 @@
 #include "tests/containerizer.hpp"
 #include "tests/mesos.hpp"
 
+#include "master/detector/standalone.hpp"
+
 using namespace mesos::internal::master::validation;
 
 using google::protobuf::RepeatedPtrField;
@@ -2793,6 +2795,263 @@ TEST_F(FrameworkInfoValidationTest, UpgradeToMultiRole)
   }
 }
 
+
+// This tests verifies that when a multi-role capable framework
+// downgrades to remove multi-role capabilities, illegal attempts to
+// change the roles the framework is subscribed to are caught.
+TEST_F(FrameworkInfoValidationTest, DowngradeFromMultipleRoles)
+{
+  Clock::pause();
+
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.add_roles("role1");
+  frameworkInfo.add_roles("role2");
+  frameworkInfo.add_capabilities()->set_type(
+      FrameworkInfo::Capability::MULTI_ROLE);
+
+  // Set a long failover timeout so the framework isn't immediately removed.
+  frameworkInfo.set_failover_timeout(Weeks(1).secs());
+
+  Future<FrameworkID> frameworkId;
+
+  {
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    EXPECT_CALL(sched, registered(&driver, _, _))
+      .WillOnce(FutureArg<1>(&frameworkId));
+
+    driver.start();
+
+    Clock::settle();
+
+    AWAIT_READY(frameworkId);
+
+    driver.stop(true); // Failover.
+    driver.join();
+  }
+
+  frameworkInfo.mutable_id()->CopyFrom(frameworkId.get());
+
+  // Downgrade `frameworkInfo` to remove multi-role capability, and
+  // migrate from `roles` to `role` field. This illegally changes the
+  // number of roles the framework is subscribed to.
+  ASSERT_EQ(2u, frameworkInfo.roles_size());
+  frameworkInfo.set_role(frameworkInfo.roles(0));
+  frameworkInfo.clear_roles();
+  ASSERT_EQ(1u, frameworkInfo.capabilities_size());
+  frameworkInfo.clear_capabilities();
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  Future<string> error;
+  EXPECT_CALL(sched, error(&driver, _))
+    .WillOnce(FutureArg<1>(&error));
+
+  driver.start();
+
+  Clock::settle();
+
+  AWAIT_EXPECT_EQ(
+      "Frameworks cannot change their roles: "
+      "expected '{ role1, role2 }', but got '{ role1 }'",
+      error);
+
+  driver.stop();
+  driver.join();
+}
+
+
+// This test verifies that a multi-role framework cannot change its
+// roles on failover.
+TEST_F(FrameworkInfoValidationTest, RejectRoleChangeWithMultiRole)
+{
+  Clock::pause();
+
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+
+  ASSERT_FALSE(frameworkInfo.has_role());
+  frameworkInfo.add_roles("role1");
+  frameworkInfo.add_capabilities()->set_type(
+      FrameworkInfo::Capability::MULTI_ROLE);
+
+  // Set a long failover timeout so the framework isn't immediately removed.
+  frameworkInfo.set_failover_timeout(Weeks(1).secs());
+
+  Future<FrameworkID> frameworkId;
+
+  {
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    EXPECT_CALL(sched, registered(&driver, _, _))
+      .WillOnce(FutureArg<1>(&frameworkId));
+
+    driver.start();
+
+    Clock::settle();
+
+    AWAIT_READY(frameworkId);
+
+    driver.stop(true); // Failover.
+    driver.join();
+  }
+
+  frameworkInfo.mutable_id()->CopyFrom(frameworkId.get());
+  frameworkInfo.add_roles("role2");
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  Future<string> error;
+  EXPECT_CALL(sched, error(&driver, _))
+    .WillOnce(FutureArg<1>(&error));
+
+  driver.start();
+
+  Clock::settle();
+
+  AWAIT_EXPECT_EQ(
+      "Frameworks cannot change their roles: "
+      "expected '{ role1 }', but got '{ role1, role2 }'",
+      error);
+
+  driver.stop();
+  driver.join();
+}
+
+
+// This test checks that frameworks cannot change their `role` during
+// master failover. The scenario tested here sets up a one-agent
+// cluster with a single framework. On failover the master would
+// first learn about the framework from the agent, and then from the
+// framework. We require the information from the agent and the
+// framework to be consistent.
+TEST_F(FrameworkInfoValidationTest, RejectRoleChangeWithMultiRoleMasterFailover)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  MockExecutor exec(DEFAULT_EXECUTOR_ID);
+  TestContainerizer containerizer(&exec);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  Try<Owned<cluster::Slave>> agent = StartSlave(&detector, &containerizer);
+  ASSERT_SOME(agent);
+
+  // Set a role for the framework which we will change later. Also,
+  // set a long framework failover timeout so the framework isn't
+  // immediately cleaned up.
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_failover_timeout(Weeks(1).secs());
+  frameworkInfo.set_role("role1");
+
+  Future<FrameworkID> frameworkId;
+
+  {
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    EXPECT_CALL(sched, registered(&driver, _, _))
+      .WillOnce(FutureArg<1>(&frameworkId));
+
+    Future<vector<Offer>> offers;
+    EXPECT_CALL(sched, resourceOffers(&driver, _))
+      .WillOnce(FutureArg<1>(&offers))
+      .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+    driver.start();
+
+    AWAIT_READY(frameworkId);
+
+    // Start a single task so the `FrameworkInfo` is known to the
+    // agent, and communicated back to the master after master failover.
+    AWAIT_READY(offers);
+    ASSERT_FALSE(offers->empty());
+
+    TaskInfo task;
+    task.set_name("");
+    task.mutable_task_id()->set_value("1");
+    task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id());
+    task.mutable_resources()->MergeFrom(offers.get()[0].resources());
+    task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);
+
+    EXPECT_CALL(exec, registered(_, _, _, _));
+
+    EXPECT_CALL(exec, launchTask(_, _))
+      .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+    Future<TaskStatus> runningStatus;
+    EXPECT_CALL(sched, statusUpdate(&driver, _))
+      .WillOnce(FutureArg<1>(&runningStatus));
+
+    driver.launchTasks(offers.get()[0].id(), {task});
+
+    AWAIT_READY(runningStatus);
+    EXPECT_EQ(TASK_RUNNING, runningStatus.get().state());
+    EXPECT_EQ(task.task_id(), runningStatus.get().task_id());
+
+    driver.stop();
+    driver.join();
+  }
+
+  // Cause a master failover.
+  detector.appoint(None());
+
+  master->reset();
+  master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Make sure the agent registers before the framework resubscribes.
+  // The master will learn about the framework from the agent.
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+
+  detector.appoint(master.get()->pid);
+
+  AWAIT_READY(slaveReregisteredMessage);
+
+  // Change the `roles` of the framework in an illegal way (upgrade to
+  // `MULTI_ROLE` where `roles` is not identical to `role`). We do
+  // not simply change `role` here as this currently would not trigger
+  // an error, but would only be logged to the master's log.
+  frameworkInfo.mutable_id()->CopyFrom(frameworkId.get());
+  frameworkInfo.add_roles("role2");
+  frameworkInfo.clear_role();
+  frameworkInfo.add_capabilities()->set_type(
+      FrameworkInfo::Capability::MULTI_ROLE);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  Future<string> error;
+  EXPECT_CALL(sched, error(&driver, _))
+    .WillOnce(FutureArg<1>(&error));
+
+  driver.start();
+
+  AWAIT_EXPECT_EQ(
+      "Frameworks cannot change their roles: "
+      "expected '{ role1 }', but got '{ role2 }'",
+      error);
+
+  driver.stop();
+  driver.join();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {