You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2015/09/19 01:08:20 UTC

mesos git commit: Fixed duplicated slave id in master after master failover.

Repository: mesos
Updated Branches:
  refs/heads/master 9327d48ca -> 461c521d4


Fixed duplicated slave id in master after master failover.

Generate masterInfo.id by UUID instead of "date + ip + port + pid".

Review: https://reviews.apache.org/r/38003


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/461c521d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/461c521d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/461c521d

Branch: refs/heads/master
Commit: 461c521d4315ea6c718954f0ed99360c80aa2c8b
Parents: 9327d48
Author: Klaus Ma <kl...@cguru.net>
Authored: Fri Sep 18 14:27:02 2015 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Sep 18 16:08:11 2015 -0700

----------------------------------------------------------------------
 src/master/master.cpp      | 14 ++-----------
 src/tests/master_tests.cpp | 45 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/461c521d/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index ca4d587..3b390d7 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -305,18 +305,8 @@ Master::Master(
   // NOTE: We populate 'info_' here instead of inside 'initialize()'
   // because 'StandaloneMasterDetector' needs access to the info.
 
-  // The master ID is currently comprised of the current date, the IP
-  // address and port from self() and the OS PID.
-  Try<string> id = strings::format(
-      "%s-%u-%u-%d",
-      DateUtils::currentDate(),
-      self().address.ip.in().get().s_addr,
-      self().address.port,
-      getpid());
-
-  CHECK(!id.isError()) << id.error();
-
-  info_.set_id(id.get());
+  // Master ID is generated randomly based on UUID.
+  info_.set_id(UUID::random().toString());
 
   // NOTE: Currently, we store ip in MasterInfo in network order,
   // which should be fixed. See MESOS-1201 for details.

http://git-wip-us.apache.org/repos/asf/mesos/blob/461c521d/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index a477794..2cb8b3c 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -3682,6 +3682,51 @@ TEST_F(MasterTest, MasterFailoverLongLivedExecutor)
   Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
 }
 
+
+// This test ensures that a slave gets a unique SlaveID even after
+// master fails over. Please refer to MESOS-3351 for further details.
+TEST_F(MasterTest, DuplicatedSlaveIdWhenSlaveReregister)
+{
+  Try<PID<Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage1 =
+      FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector slaveDetector1 (master.get());
+  Try<PID<Slave>> slave1 = StartSlave(&slaveDetector1);
+  ASSERT_SOME(slave1);
+
+  AWAIT_READY(slaveRegisteredMessage1);
+
+  Stop(master.get());
+  master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage2 =
+      FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  // Start a new slave and make sure it registers before the old slave.
+  slave::Flags slaveFlags2 = CreateSlaveFlags();
+  Try<PID<Slave>> slave2 = StartSlave(slaveFlags2);
+  ASSERT_SOME(slave2);
+
+  AWAIT_READY(slaveRegisteredMessage2);
+
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage1 =
+      FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get(), _);
+
+  // Now let the first slave re-register.
+  slaveDetector1.appoint(master.get());
+
+  // If both the slaves get the same SlaveID, the re-registration would
+  // fail here.
+  AWAIT_READY(slaveReregisteredMessage1);
+
+  Shutdown();
+}
+
+
 // This test ensures that if a framework scheduler provides any
 // labels in its FrameworkInfo message, those labels are included
 // in the master's state endpoint.