You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/05/03 22:38:59 UTC

[4/4] mesos git commit: Prevent old Mesos agents from registering or re-registering.

Prevent old Mesos agents from registering or re-registering.

Officially, Mesos 1.0.0 (and newer) masters do not support pre-1.0.0
Mesos agents. However, we previously allowed old agents to register,
which resulted in several master crashes. As a short-term solution, we
fixed those crashes by adding backward compatibility mechanisms into the
master, but that backward compatibility code has made the master logic
more complicated and difficult to understand.

This commit changes the master to ignore registration attempts by Mesos
agents that precede Mesos 1.0.0. Now that this safety check is in place,
master logic can safely assume that all agents are running at least
Mesos 1.0.0, which will allow several simplifications to be made.

Review: https://reviews.apache.org/r/58709


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/08808739
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/08808739
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/08808739

Branch: refs/heads/master
Commit: 08808739753447059c1585379dc51637d50fea35
Parents: ecc7321
Author: Neil Conway <ne...@gmail.com>
Authored: Mon Mar 6 10:10:45 2017 -0800
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed May 3 15:38:14 2017 -0700

----------------------------------------------------------------------
 src/master/constants.hpp   |  4 ++
 src/master/master.cpp      | 36 +++++++++++++++++
 src/tests/master_tests.cpp | 86 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/08808739/src/master/constants.hpp
----------------------------------------------------------------------
diff --git a/src/master/constants.hpp b/src/master/constants.hpp
index 7edf9f6..725680b 100644
--- a/src/master/constants.hpp
+++ b/src/master/constants.hpp
@@ -21,6 +21,7 @@
 
 #include <stout/bytes.hpp>
 #include <stout/duration.hpp>
+#include <stout/version.hpp>
 
 namespace mesos {
 namespace internal {
@@ -145,6 +146,9 @@ constexpr char READWRITE_HTTP_AUTHENTICATION_REALM[] =
 constexpr char DEFAULT_HTTP_FRAMEWORK_AUTHENTICATION_REALM[] =
   "mesos-master-scheduler";
 
+// Agents older than this version are not allowed to register.
+const Version MINIMUM_AGENT_VERSION = Version(1, 0, 0);
+
 } // namespace master {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/08808739/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 87c647b..4e7a161 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -5505,6 +5505,24 @@ void Master::_registerSlave(
     return;
   }
 
+  // Ignore registration attempts by agents running old Mesos versions.
+  // We expect that the agent's version is in SemVer format; if the
+  // version cannot be parsed, the registration attempt is ignored.
+  Try<Version> parsedVersion = Version::parse(version);
+
+  if (parsedVersion.isError()) {
+    LOG(WARNING) << "Failed to parse version '" << version << "'"
+                 << " of agent at " << pid << ": " << parsedVersion.error()
+                 << "; ignoring agent registration attempt";
+    return;
+  } else if (parsedVersion.get() < MINIMUM_AGENT_VERSION) {
+    LOG(WARNING) << "Ignoring registration attempt from old agent at "
+                 << pid << ": agent version is " << parsedVersion.get()
+                 << ", minimum supported agent version is "
+                 << MINIMUM_AGENT_VERSION;
+    return;
+  }
+
   // Check if this slave is already registered (because it retries).
   if (Slave* slave = slaves.registered.get(pid)) {
     if (!slave->connected) {
@@ -5775,6 +5793,24 @@ void Master::_reregisterSlave(
     return;
   }
 
+  // Ignore re-registration attempts by agents running old Mesos versions.
+  // We expect that the agent's version is in SemVer format; if the
+  // version cannot be parsed, the re-registration attempt is ignored.
+  Try<Version> parsedVersion = Version::parse(version);
+
+  if (parsedVersion.isError()) {
+    LOG(WARNING) << "Failed to parse version '" << version << "'"
+                 << " of agent at " << pid << ": " << parsedVersion.error()
+                 << "; ignoring agent re-registration attempt";
+    return;
+  } else if (parsedVersion.get() < MINIMUM_AGENT_VERSION) {
+    LOG(WARNING) << "Ignoring re-registration attempt from old agent at "
+                 << pid << ": agent version is " << parsedVersion.get()
+                 << ", minimum supported agent version is "
+                 << MINIMUM_AGENT_VERSION;
+    return;
+  }
+
   if (Slave* slave = slaves.registered.get(slaveInfo.id())) {
     CHECK(!slaves.recovered.contains(slaveInfo.id()));
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/08808739/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 7cb4774..ceee2f4 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -7012,6 +7012,92 @@ TEST_F(MasterTest, MultiRoleSchedulerUnsubscribeFromRole)
   driver2.join();
 }
 
+
+// Check that the master does not allow old Mesos agents to register.
+// We do this by intercepting the agent's `RegisterSlaveMessage` and
+// then re-sending it with a tweaked version number.
+TEST_F(MasterTest, IgnoreOldAgentRegistration)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    DROP_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  Clock::pause();
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.authentication_backoff_factor);
+  Clock::advance(slaveFlags.registration_backoff_factor);
+
+  AWAIT_READY(registerSlaveMessage);
+
+  RegisterSlaveMessage message = registerSlaveMessage.get();
+  message.set_version("0.28.1-rc1");
+
+  // The master should ignore the agent's registration attempt. Hence,
+  // we do not expect the master to try to update the registry.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  process::post(slave.get()->pid, master.get()->pid, message);
+
+  // Settle the clock to retire in-flight messages.
+  Clock::settle();
+}
+
+
+// Check that the master does not allow old Mesos agents to re-register.
+// We do this by intercepting the agent's `ReregisterSlaveMessage` and
+// then re-sending it with a tweaked version number.
+TEST_F(MasterTest, IgnoreOldAgentReregistration)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Intercept the agent's `ReregisterSlaveMessage`.
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    DROP_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  Clock::pause();
+
+  // Simulate a new master detected event on the slave,
+  // so that the slave will attempt to re-register.
+  detector.appoint(master.get()->pid);
+
+  Clock::advance(slaveFlags.authentication_backoff_factor);
+  Clock::advance(slaveFlags.registration_backoff_factor);
+
+  AWAIT_READY(reregisterSlaveMessage);
+
+  ReregisterSlaveMessage message = reregisterSlaveMessage.get();
+  message.set_version("0.28.1-rc1");
+
+  // The master should ignore the agent's re-registration attempt, so
+  // we do not expect the master to try to update the registry.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  process::post(slave.get()->pid, master.get()->pid, message);
+
+  // Settle the clock to retire in-flight messages.
+  Clock::settle();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {