You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/05/24 01:06:59 UTC

[2/2] git commit: Fixed master to send a FrameworkReregistered message when the framework re-registers with a failed over master.

Fixed master to send a FrameworkReregistered message when the
framework re-registers with a failed over master.

Review: https://reviews.apache.org/r/11348


Project: http://git-wip-us.apache.org/repos/asf/incubator-mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mesos/commit/68ccbf1d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mesos/tree/68ccbf1d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mesos/diff/68ccbf1d

Branch: refs/heads/master
Commit: 68ccbf1d9ab1fee2bef933a27368e5460f9be42e
Parents: a45fd2d
Author: Vinod Kone <vi...@twitter.com>
Authored: Fri May 10 16:21:44 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Thu May 23 16:06:04 2013 -0700

----------------------------------------------------------------------
 src/master/master.cpp                   |   26 +++++++++------
 src/master/master.hpp                   |    2 +-
 src/tests/allocator_zookeeper_tests.cpp |    4 +-
 src/tests/fault_tolerance_tests.cpp     |   44 ++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/68ccbf1d/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index c44f2b7..d5e5804 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -658,7 +658,7 @@ void Master::reregisterFramework(const FrameworkInfo& frameworkInfo,
     // N.B. Need to add the framwwork _after_ we add it's tasks
     // (above) so that we can properly determine the resources it's
     // currently using!
-    addFramework(framework);
+    addFramework(framework, true);
   }
 
   CHECK(frameworks.count(frameworkInfo.id()) > 0);
@@ -913,7 +913,7 @@ void Master::reregisterSlave(const SlaveID& slaveId,
     // partitioned, we don't allow the slave to re-register, as we've
     // already informed frameworks that the tasks were lost.
     LOG(ERROR) << "Slave " << slaveId << " at " << from
-               << "attempted to re-register after deactivation";
+               << " attempted to re-register after deactivation";
     reply(ShutdownMessage());
   } else {
     Slave* slave = getSlave(slaveId);
@@ -1632,7 +1632,7 @@ Resources Master::launchTask(const TaskInfo& task,
 }
 
 
-void Master::addFramework(Framework* framework)
+void Master::addFramework(Framework* framework, bool reregister)
 {
   CHECK(frameworks.count(framework->id) == 0);
 
@@ -1640,14 +1640,20 @@ void Master::addFramework(Framework* framework)
 
   link(framework->pid);
 
-  FrameworkRegisteredMessage message;
-  message.mutable_framework_id()->MergeFrom(framework->id);
-  message.mutable_master_info()->MergeFrom(info);
-  send(framework->pid, message);
+  if (reregister) {
+    FrameworkReregisteredMessage message;
+    message.mutable_framework_id()->MergeFrom(framework->id);
+    message.mutable_master_info()->MergeFrom(info);
+    send(framework->pid, message);
+  } else {
+    FrameworkRegisteredMessage message;
+    message.mutable_framework_id()->MergeFrom(framework->id);
+    message.mutable_master_info()->MergeFrom(info);
+    send(framework->pid, message);
+  }
 
-  allocator->frameworkAdded(framework->id,
-                            framework->info,
-                            framework->resources);
+  allocator->frameworkAdded(
+      framework->id, framework->info, framework->resources);
 }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/68ccbf1d/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index d3790dc..0a130d9 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -138,7 +138,7 @@ protected:
                     const Filters& filters);
 
   // Add a framework.
-  void addFramework(Framework* framework);
+  void addFramework(Framework* framework, bool reregister = false);
 
   // Replace the scheduler for a framework with a new process ID, in
   // the event of a scheduler failover.

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/68ccbf1d/src/tests/allocator_zookeeper_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/allocator_zookeeper_tests.cpp b/src/tests/allocator_zookeeper_tests.cpp
index 2c7deb1..39b4627 100644
--- a/src/tests/allocator_zookeeper_tests.cpp
+++ b/src/tests/allocator_zookeeper_tests.cpp
@@ -181,7 +181,7 @@ TYPED_TEST(AllocatorZooKeeperTest, FrameworkReregistersFirst)
     .WillOnce(DoAll(InvokeFrameworkAdded(&this->allocator2),
                     FutureSatisfy(&frameworkAdded)));
 
-  EXPECT_CALL(sched, registered(&driver, _, _));
+  EXPECT_CALL(sched, reregistered(&driver, _));
 
   AWAIT_READY(frameworkAdded);
 
@@ -318,7 +318,7 @@ TYPED_TEST(AllocatorZooKeeperTest, SlaveReregistersFirst)
     .WillOnce(DoAll(InvokeSlaveAdded(&this->allocator2),
                     FutureSatisfy(&slaveAdded)));
 
-  EXPECT_CALL(sched, registered(&driver, _, _));
+  EXPECT_CALL(sched, reregistered(&driver, _));
 
   AWAIT_READY(slaveAdded);
 

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/68ccbf1d/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index d41bef7..cc379f0 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -616,6 +616,50 @@ TEST_F(FaultToleranceClusterTest, PartitionedSlaveExitedExecutor)
 }
 
 
+// This test ensures that a framework connecting with a
+// failed over master gets a re-registered callback.
+TEST_F(FaultToleranceClusterTest, MasterFailover)
+{
+  Try<PID<Master> > master = cluster.masters.start();
+  ASSERT_SOME(master);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get());
+
+  Future<process::Message> frameworkRegisteredMessage =
+    FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  driver.start();
+
+  AWAIT_READY(frameworkRegisteredMessage);
+
+  // Simulate failed over master by restarting the master.
+  ASSERT_SOME(cluster.masters.stop(master.get()));
+  master = cluster.masters.start();
+  ASSERT_SOME(master);
+
+  Future<Nothing> reregistered;
+  EXPECT_CALL(sched, reregistered(&driver, _))
+    .WillOnce(FutureSatisfy(&reregistered));
+
+  // Simulate a new master detected message to the scheduler.
+  NewMasterDetectedMessage newMasterDetectedMsg;
+  newMasterDetectedMsg.set_pid(master.get());
+
+  process::post(frameworkRegisteredMessage.get().to, newMasterDetectedMsg);
+
+  // Framework should get a re-register callback.
+  AWAIT_READY(reregistered);
+
+  driver.stop();
+  driver.join();
+
+  cluster.shutdown();
+}
+
+
 TEST_F(FaultToleranceTest, SchedulerFailover)
 {
   ASSERT_TRUE(GTEST_IS_THREADSAFE);