You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/07/11 18:03:46 UTC

[5/8] mesos git commit: Caused master to abort when joining a mixed-region cluster.

Caused master to abort when joining a mixed-region cluster.

That is, if a standby master is configured to use region X but it learns
that the current master has region Y, the standby master will abort with
an error message. This enforces the requirement that all masters in a
single Mesos cluster are configured to use the same region (they can be
configured to use different zones in that region, however).

To allow graceful upgrades, we only abort the standby master if both the
standby master and the leading master have a configured domain; if
either master has the unset (default) domain, the standby master does
not abort.

NOTE: It would be nice to have unit tests to validate this behavior, but
the current unit test infrastructure does not support starting multiple
masters (MESOS-2976).

Review: https://reviews.apache.org/r/59763/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a8c7ae44
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a8c7ae44
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a8c7ae44

Branch: refs/heads/master
Commit: a8c7ae44c85657d159026f1169596c2cd78b357d
Parents: ac2b2c8
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:35 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:35 2017 -0700

----------------------------------------------------------------------
 include/mesos/type_utils.hpp |  8 ++++++++
 include/mesos/v1/mesos.hpp   |  8 ++++++++
 src/master/master.cpp        | 29 +++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/include/mesos/type_utils.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp
index 3af1b23..3bbc1fe 100644
--- a/include/mesos/type_utils.hpp
+++ b/include/mesos/type_utils.hpp
@@ -264,6 +264,14 @@ inline bool operator!=(const DurationInfo& left, const DurationInfo& right)
 }
 
 
+inline bool operator!=(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() != right.name();
+}
+
+
 inline bool operator<(const ContainerID& left, const ContainerID& right)
 {
   return left.value() < right.value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/include/mesos/v1/mesos.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp
index 752b2b9..0ed78fc 100644
--- a/include/mesos/v1/mesos.hpp
+++ b/include/mesos/v1/mesos.hpp
@@ -263,6 +263,14 @@ inline bool operator!=(const DurationInfo& left, const DurationInfo& right)
 }
 
 
+inline bool operator!=(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() != right.name();
+}
+
+
 inline bool operator<(const ContainerID& left, const ContainerID& right)
 {
   return left.value() < right.value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 39b2fea..8c38b9c 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2180,6 +2180,35 @@ void Master::detected(const Future<Option<MasterInfo>>& _leader)
     if (wasElected) {
       EXIT(EXIT_FAILURE) << "Lost leadership... committing suicide!";
     }
+
+    // If this master and the current leader both have a configured
+    // domain and the current leader is located in a different region,
+    // exit with an error message: this indicates a configuration
+    // error, since all masters must be in the same region.
+    if (leader->has_domain() && info_.has_domain()) {
+      const DomainInfo& leaderDomain = leader->domain();
+      const DomainInfo& selfDomain = info_.domain();
+
+      // We currently reject configured domains without fault domains,
+      // but that might change in the future. For compatibility with
+      // future versions of Mesos, we treat a master with a configured
+      // domain but no fault domain as equivalent to a master with no
+      // configured domain.
+      if (leaderDomain.has_fault_domain() && selfDomain.has_fault_domain()) {
+        const DomainInfo::FaultDomain::RegionInfo& leaderRegion =
+          leaderDomain.fault_domain().region();
+        const DomainInfo::FaultDomain::RegionInfo& selfRegion =
+          selfDomain.fault_domain().region();
+
+        if (leaderRegion != selfRegion) {
+          EXIT(EXIT_FAILURE) << "Leading master uses domain "
+                             << leaderDomain << "; this master is "
+                             << "configured to use domain "
+                             << selfDomain << "; all masters in the "
+                             << "same cluster must use the same region";
+        }
+      }
+    }
   }
 
   // Keep detecting.