You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/07/11 18:03:42 UTC

[1/8] mesos git commit: Added protobuf definitions for fault domains.

Repository: mesos
Updated Branches:
  refs/heads/master 4e41d847e -> 33093c893


Added protobuf definitions for fault domains.

This introduces a first-class notion of a "domain", which is a set of
hosts that have similar characteristics. Mesos will initially only
support "fault domains", which identify groups of hosts with similar
failure characteristics.

Review: https://reviews.apache.org/r/59759/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1789278d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1789278d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1789278d

Branch: refs/heads/master
Commit: 1789278dc3a930a3ee4aa4e6f38e85688d410032
Parents: 4e41d84
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:18 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:18 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto    | 54 +++++++++++++++++++++++++++++++++++++++
 include/mesos/type_utils.hpp | 33 ++++++++++++++++++++++++
 include/mesos/v1/mesos.hpp   | 33 ++++++++++++++++++++++++
 include/mesos/v1/mesos.proto | 54 +++++++++++++++++++++++++++++++++++++++
 src/common/type_utils.cpp    |  6 +++++
 src/v1/mesos.cpp             |  6 +++++
 6 files changed, 186 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index d70ac9e..8355371 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -764,6 +764,60 @@ message ExecutorInfo {
 
 
 /**
+ * Describes a domain. A domain is a collection of hosts that have
+ * similar characteristics. Mesos currently only supports "fault
+ * domains", which identify groups of hosts with similar failure
+ * characteristics.
+ *
+ * Frameworks can generally assume that network links between hosts in
+ * the same fault domain have lower latency, higher bandwidth, and better
+ * availability than network links between hosts in different domains.
+ * Schedulers may prefer to place network-intensive workloads in the
+ * same domain, as this may improve performance. Conversely, a single
+ * failure that affects a host in a domain may be more likely to
+ * affect other hosts in the same domain; hence, schedulers may prefer
+ * to place workloads that require high availability in multiple
+ * domains. (For example, all the hosts in a single rack might lose
+ * power or network connectivity simultaneously.)
+ *
+ * There are two kinds of fault domains: regions and zones. Regions
+ * offer the highest degree of fault isolation, but network latency
+ * between regions is typically high (typically >50 ms). Zones offer a
+ * modest degree of fault isolation along with reasonably low network
+ * latency (typically <10 ms).
+ *
+ * The mapping from fault domains to physical infrastructure is up to
+ * the operator to configure. In cloud environments, regions and zones
+ * can be mapped to the "region" and "availability zone" concepts
+ * exposed by most cloud providers, respectively. In on-premise
+ * deployments, regions and zones can be mapped to data centers and
+ * racks, respectively.
+ *
+ * Both masters and agents can be configured with domains. Frameworks
+ * can compare the domains of two hosts to determine if the hosts are
+ * in the same zone, in different zones in the same region, or in
+ * different regions. Note that all masters in a given Mesos cluster
+ * must be in the same region.
+ */
+message DomainInfo {
+  message FaultDomain {
+    message RegionInfo {
+      required string name = 1;
+    }
+
+    message ZoneInfo {
+      required string name = 1;
+    }
+
+    required RegionInfo region = 1;
+    required ZoneInfo zone = 2;
+  }
+
+  optional FaultDomain fault_domain = 1;
+}
+
+
+/**
  * Describes a master. This will probably have more fields in the
  * future which might be used, for example, to link a framework webui
  * to a master webui.

http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/include/mesos/type_utils.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp
index d2a6591..3af1b23 100644
--- a/include/mesos/type_utils.hpp
+++ b/include/mesos/type_utils.hpp
@@ -175,6 +175,36 @@ inline bool operator==(const TaskID& left, const std::string& right)
 }
 
 
+inline bool operator==(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() == right.name();
+}
+
+
+inline bool operator==(
+    const DomainInfo::FaultDomain::ZoneInfo& left,
+    const DomainInfo::FaultDomain::ZoneInfo& right)
+{
+  return left.name() == right.name();
+}
+
+
+inline bool operator==(
+    const DomainInfo::FaultDomain& left,
+    const DomainInfo::FaultDomain& right)
+{
+  return left.region() == right.region() && left.zone() == right.zone();
+}
+
+
+inline bool operator==(const DomainInfo& left, const DomainInfo& right)
+{
+  return left.fault_domain() == right.fault_domain();
+}
+
+
 /**
  * For machines to match, both the `hostname` and `ip` must be equivalent.
  * Hostname is not case sensitive, so it is lowercased before comparison.
@@ -296,6 +326,9 @@ std::ostream& operator<<(
     const ContainerInfo& containerInfo);
 
 
+std::ostream& operator<<(std::ostream& stream, const DomainInfo& domainInfo);
+
+
 std::ostream& operator<<(std::ostream& stream, const Environment& environment);
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/include/mesos/v1/mesos.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp
index 2479b00..752b2b9 100644
--- a/include/mesos/v1/mesos.hpp
+++ b/include/mesos/v1/mesos.hpp
@@ -174,6 +174,36 @@ inline bool operator==(const TaskID& left, const std::string& right)
 }
 
 
+inline bool operator==(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() == right.name();
+}
+
+
+inline bool operator==(
+    const DomainInfo::FaultDomain::ZoneInfo& left,
+    const DomainInfo::FaultDomain::ZoneInfo& right)
+{
+  return left.name() == right.name();
+}
+
+
+inline bool operator==(
+    const DomainInfo::FaultDomain& left,
+    const DomainInfo::FaultDomain& right)
+{
+  return left.region() == right.region() && left.zone() == right.zone();
+}
+
+
+inline bool operator==(const DomainInfo& left, const DomainInfo& right)
+{
+  return left.fault_domain() == right.fault_domain();
+}
+
+
 /**
  * For machines to match, both the `hostname` and `ip` must be equivalent.
  * Hostname is not case sensitive, so it is lowercased before comparison.
@@ -287,6 +317,9 @@ std::ostream& operator<<(
     const ContainerInfo& containerInfo);
 
 
+std::ostream& operator<<(std::ostream& stream, const DomainInfo& domainInfo);
+
+
 std::ostream& operator<<(std::ostream& stream, const ExecutorID& executorId);
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index fee8b0c..4d5cebd 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -764,6 +764,60 @@ message ExecutorInfo {
 
 
 /**
+ * Describes a domain. A domain is a collection of hosts that have
+ * similar characteristics. Mesos currently only supports "fault
+ * domains", which identify groups of hosts with similar failure
+ * characteristics.
+ *
+ * Frameworks can generally assume that network links between hosts in
+ * the same fault domain have lower latency, higher bandwidth, and better
+ * availability than network links between hosts in different domains.
+ * Schedulers may prefer to place network-intensive workloads in the
+ * same domain, as this may improve performance. Conversely, a single
+ * failure that affects a host in a domain may be more likely to
+ * affect other hosts in the same domain; hence, schedulers may prefer
+ * to place workloads that require high availability in multiple
+ * domains. (For example, all the hosts in a single rack might lose
+ * power or network connectivity simultaneously.)
+ *
+ * There are two kinds of fault domains: regions and zones. Regions
+ * offer the highest degree of fault isolation, but network latency
+ * between regions is typically high (typically >50 ms). Zones offer a
+ * modest degree of fault isolation along with reasonably low network
+ * latency (typically <10 ms).
+ *
+ * The mapping from fault domains to physical infrastructure is up to
+ * the operator to configure. In cloud environments, regions and zones
+ * can be mapped to the "region" and "availability zone" concepts
+ * exposed by most cloud providers, respectively. In on-premise
+ * deployments, regions and zones can be mapped to data centers and
+ * racks, respectively.
+ *
+ * Both masters and agents can be configured with domains. Frameworks
+ * can compare the domains of two hosts to determine if the hosts are
+ * in the same zone, in different zones in the same region, or in
+ * different regions. Note that all masters in a given Mesos cluster
+ * must be in the same region.
+ */
+message DomainInfo {
+  message FaultDomain {
+    message RegionInfo {
+      required string name = 1;
+    }
+
+    message ZoneInfo {
+      required string name = 1;
+    }
+
+    required RegionInfo region = 1;
+    required ZoneInfo zone = 2;
+  }
+
+  optional FaultDomain fault_domain = 1;
+}
+
+
+/**
  * Describes a master. This will probably have more fields in the
  * future which might be used, for example, to link a framework webui
  * to a master webui.

http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index aeb1623..a43a6c8 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -559,6 +559,12 @@ ostream& operator<<(ostream& stream, const ContainerInfo& containerInfo)
 }
 
 
+ostream& operator<<(ostream& stream, const DomainInfo& domainInfo)
+{
+  return stream << JSON::protobuf(domainInfo);
+}
+
+
 ostream& operator<<(ostream& stream, const Environment& environment)
 {
   return stream << JSON::protobuf(environment);

http://git-wip-us.apache.org/repos/asf/mesos/blob/1789278d/src/v1/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/v1/mesos.cpp b/src/v1/mesos.cpp
index 423510e..13f336c 100644
--- a/src/v1/mesos.cpp
+++ b/src/v1/mesos.cpp
@@ -467,6 +467,12 @@ ostream& operator<<(ostream& stream, const ContainerInfo& containerInfo)
 }
 
 
+ostream& operator<<(ostream& stream, const DomainInfo& domainInfo)
+{
+  return stream << JSON::protobuf(domainInfo);
+}
+
+
 ostream& operator<<(ostream& stream, const ExecutorID& executorId)
 {
   return stream << executorId.value();


[3/8] mesos git commit: Added master and agent flags to specify domain.

Posted by ne...@apache.org.
Added master and agent flags to specify domain.

Added master and agent flags to specify domain.

Review: https://reviews.apache.org/r/59761/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5b99d394
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5b99d394
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5b99d394

Branch: refs/heads/master
Commit: 5b99d39472b5b5d974b7f40c6ae47753ac86d89c
Parents: 4189a45
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:20 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:20 2017 -0700

----------------------------------------------------------------------
 docs/configuration.md | 30 ++++++++++++++++++++++++++++++
 src/common/parse.hpp  | 12 ++++++++++++
 src/master/flags.cpp  | 36 ++++++++++++++++++++++++++++++++++++
 src/master/flags.hpp  |  1 +
 src/slave/flags.cpp   | 37 +++++++++++++++++++++++++++++++++++++
 src/slave/flags.hpp   |  1 +
 6 files changed, 117 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 007d0f5..f39d220 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -112,6 +112,36 @@ Example:
 </tr>
 <tr>
   <td>
+    --domain=VALUE
+  </td>
+  <td>
+Domain that the master or agent belongs to. Mesos currently only supports
+fault domains, which identify groups of hosts with similar failure
+characteristics. A fault domain consists of a region and a zone. All masters
+in the same Mesos cluster must be in the same region (they can be in
+different zones). Agents configured to use a different region than the
+master's region will not appear in resource offers to frameworks that have
+not enabled the <code>REGION_AWARE</code> capability. This value can be
+specified as either a JSON-formatted string or a file path containing JSON.
+<p/>
+Example:
+<pre><code>{
+  "fault_domain":
+    {
+      "region":
+        {
+          "name": "aws-us-east-1"
+        },
+      "zone":
+        {
+          "name": "aws-us-east-1a"
+        }
+    }
+}</code></pre>
+  </td>
+</tr>
+<tr>
+  <td>
     --[no-]help
   </td>
   <td>

http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/src/common/parse.hpp
----------------------------------------------------------------------
diff --git a/src/common/parse.hpp b/src/common/parse.hpp
index 64eabf8..212d640 100644
--- a/src/common/parse.hpp
+++ b/src/common/parse.hpp
@@ -214,6 +214,18 @@ inline Try<mesos::RLimitInfo> parse(const std::string& value)
 
 
 template <>
+inline Try<mesos::DomainInfo> parse(const std::string& value)
+{
+  Try<JSON::Object> json = parse<JSON::Object>(value);
+  if (json.isError()) {
+    return Error(json.error());
+  }
+
+  return protobuf::parse<mesos::DomainInfo>(json.get());
+}
+
+
+template <>
 inline Try<mesos::FrameworkID> parse(const std::string& value)
 {
   mesos::FrameworkID frameworkId;

http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/src/master/flags.cpp
----------------------------------------------------------------------
diff --git a/src/master/flags.cpp b/src/master/flags.cpp
index ca5f02c..fa6d274 100644
--- a/src/master/flags.cpp
+++ b/src/master/flags.cpp
@@ -641,4 +641,40 @@ mesos::internal::master::Flags::Flags()
       "Optional IP discovery binary: if set, it is expected to emit\n"
       "the IP address which the master will try to bind to.\n"
       "Cannot be used in conjunction with `--ip`.");
+
+  add(&Flags::domain,
+      "domain",
+      "Domain that the master belongs to. Mesos currently only supports\n"
+      "fault domains, which identify groups of hosts with similar failure\n"
+      "characteristics. A fault domain consists of a region and a zone.\n"
+      "All masters in the same Mesos cluster must be in the same region\n"
+      "(they can be in different zones). This value can be specified as\n"
+      "either a JSON-formatted string or a file path containing JSON.\n"
+      "\n"
+      "Example:\n"
+      "{\n"
+      "  \"fault_domain\":\n"
+      "    {\n"
+      "      \"region\":\n"
+      "        {\n"
+      "          \"name\": \"aws-us-east-1\"\n"
+      "        },\n"
+      "      \"zone\":\n"
+      "        {\n"
+      "          \"name\": \"aws-us-east-1a\"\n"
+      "        }\n"
+      "    }\n"
+      "}",
+      [](const Option<DomainInfo>& domain) -> Option<Error> {
+        if (domain.isSome()) {
+          // Don't let the user specify a domain without a fault
+          // domain. This is allowed by the protobuf spec (for forward
+          // compatibility with possible future changes), but is not a
+          // useful configuration right now.
+          if (!domain->has_fault_domain()) {
+            return Error("`domain` must define `fault_domain`");
+          }
+        }
+        return None();
+      });
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/src/master/flags.hpp
----------------------------------------------------------------------
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 8a9bd2d..edda71a 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -96,6 +96,7 @@ public:
   Duration registry_gc_interval;
   Duration registry_max_agent_age;
   size_t registry_max_agent_count;
+  Option<DomainInfo> domain;
 
   // The following flags are executable specific (e.g., since we only
   // have one instance of libprocess per execution, we only want to

http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/src/slave/flags.cpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 74df647..a4c1a0c 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -1134,4 +1134,41 @@ mesos::internal::slave::Flags::Flags()
       "NOTE: Currently Mesos doesn't listen on IPv6 sockets and hence\n"
       "this IPv6 address is only used to advertise IPv6 addresses for\n"
       "containers running on the host network.\n");
+
+  add(&Flags::domain,
+      "domain",
+      "Domain that the agent belongs to. Mesos currently only supports\n"
+      "fault domains, which identify groups of hosts with similar failure\n"
+      "characteristics. A fault domain consists of a region and a zone.\n"
+      "If this agent is placed in a different region than the master, it\n"
+      "will not appear in resource offers to frameworks that have not\n"
+      "enabled the REGION_AWARE capability. This value can be specified\n"
+      "as either a JSON-formatted string or a file path containing JSON.\n"
+      "\n"
+      "Example:\n"
+      "{\n"
+      "  \"fault_domain\":\n"
+      "    {\n"
+      "      \"region\":\n"
+      "        {\n"
+      "          \"name\": \"aws-us-east-1\"\n"
+      "        },\n"
+      "      \"zone\":\n"
+      "        {\n"
+      "          \"name\": \"aws-us-east-1a\"\n"
+      "        }\n"
+      "    }\n"
+      "}",
+      [](const Option<DomainInfo>& domain) -> Option<Error> {
+        if (domain.isSome()) {
+          // Don't let the user specify a domain without a fault
+          // domain. This is allowed by the protobuf spec (for forward
+          // compatibility with possible future changes), but is not a
+          // useful configuration right now.
+          if (!domain->has_fault_domain()) {
+            return Error("`domain` must define `fault_domain`");
+          }
+        }
+        return None();
+      });
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/5b99d394/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 858876f..bf9adf0 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -168,6 +168,7 @@ public:
   std::string xfs_project_range;
 #endif
   bool http_command_executor;
+  Option<DomainInfo> domain;
 
   // The following flags are executable specific (e.g., since we only
   // have one instance of libprocess per execution, we only want to


[6/8] mesos git commit: Ignore registration attempts by agents with misconfigured domain.

Posted by ne...@apache.org.
Ignore registration attempts by agents with misconfigured domain.

We expect the master's domain to be configured first, then the domain of
the agents to be configured. Therefore, if an agent with configured
domain attempts to register or re-register with a master that does not
have a configured domain, the registration attempt should be ignored.
This is important, because the master would have no way of determining
whether the agent is "remote" or not, which might result in placing
inappropriate workloads on it.

Review: https://reviews.apache.org/r/59764/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5cf4934c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5cf4934c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5cf4934c

Branch: refs/heads/master
Commit: 5cf4934c3336b806f4a1e28848ad7d428a0fbc79
Parents: a8c7ae4
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:38 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:38 2017 -0700

----------------------------------------------------------------------
 src/master/master.cpp      |  28 +++++++++
 src/tests/master_tests.cpp | 132 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 160 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5cf4934c/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8c38b9c..68b1c71 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -5774,6 +5774,20 @@ void Master::_registerSlave(
     return;
   }
 
+  // If the agent is configured with a domain but the master is not,
+  // we can't determine whether the agent is remote. To be safe, we
+  // don't allow the agent to register. We don't shutdown the agent so
+  // that any tasks on the agent can continue to run.
+  //
+  // TODO(neilc): Consider sending a warning to agent (MESOS-7615).
+  if (slaveInfo.has_domain() && !info_.has_domain()) {
+    LOG(WARNING) << "Agent at " << pid << " is configured with "
+                 << "domain " << slaveInfo.domain() << " "
+                 << "but the master has no configured domain. "
+                 << "Ignoring agent registration attempt";
+    return;
+  }
+
   // Check if this slave is already registered (because it retries).
   if (Slave* slave = slaves.registered.get(pid)) {
     if (!slave->connected) {
@@ -6069,6 +6083,20 @@ void Master::_reregisterSlave(
     return;
   }
 
+  // If the agent is configured with a domain but the master is not,
+  // we can't determine whether the agent is remote. To be safe, we
+  // don't allow the agent to re-register. We don't shutdown the agent
+  // so that any tasks on the agent can continue to run.
+  //
+  // TODO(neilc): Consider sending a warning to agent (MESOS-7615).
+  if (slaveInfo.has_domain() && !info_.has_domain()) {
+    LOG(WARNING) << "Agent at " << pid << " is configured with "
+                 << "domain " << slaveInfo.domain() << " "
+                 << "but the master has no configured domain."
+                 << "Ignoring agent re-registration attempt";
+    return;
+  }
+
   if (Slave* slave = slaves.registered.get(slaveInfo.id())) {
     CHECK(!slaves.recovered.contains(slaveInfo.id()));
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/5cf4934c/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index ac33e93..effb96b 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -7362,6 +7362,138 @@ TEST_F(MasterTest, MultiRoleSchedulerUnsubscribeFromRole)
 }
 
 
+// This test checks that if the master is configured with a domain but
+// the agent is not, the agent is allowed to register and its
+// resources are offered to frameworks as usual.
+TEST_F(MasterTest, AgentDomainUnset)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers));
+
+  driver.start();
+
+  AWAIT_READY(offers);
+  ASSERT_FALSE(offers->empty());
+
+  driver.stop();
+  driver.join();
+}
+
+
+// This test checks that if the agent is configured with a domain but
+// the master is not, the agent is not allowed to register.
+TEST_F(MasterTest, AgentDomainMismatch)
+{
+  Clock::pause();
+
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.domain = createDomainInfo("region-abc", "zone-456");
+
+  // Agent should attempt to register.
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  // If the agent is allowed to register, the master will update the
+  // registry. The agent should not be allowed to register, so we
+  // expect that no registrar operations will be observed.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(registerSlaveMessage);
+
+  Clock::settle();
+}
+
+
+// This test checks that if the agent is configured with a domain but
+// the master is not, the agent is not allowed to re-register. This
+// might happen if the leading master is configured with a domain but
+// one of the standby masters is not, and then the leader fails over.
+TEST_F(MasterTest, AgentDomainMismatchOnReregister)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.domain = createDomainInfo("region-abc", "zone-456");
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Simulate master failover and start a new master with no domain
+  // configured.
+  master->reset();
+
+  masterFlags.domain = None();
+
+  master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  // If the agent is allowed to re-register, the master will update
+  // the registry. The agent should not be allowed to register, so we
+  // expect that no registrar operations will be observed.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  // Simulate a new master detected event.
+  detector.appoint(master.get()->pid);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(reregisterSlaveMessage);
+
+  Clock::settle();
+}
+
+
 // Check that the master does not allow old Mesos agents to register.
 // We do this by intercepting the agent's `RegisterSlaveMessage` and
 // then re-sending it with a tweaked version number.


[5/8] mesos git commit: Caused master to abort when joining a mixed-region cluster.

Posted by ne...@apache.org.
Caused master to abort when joining a mixed-region cluster.

That is, if a standby master is configured to use region X but it learns
that the current master has region Y, the standby master will abort with
an error message. This enforces the requirement that all masters in a
single Mesos cluster are configured to use the same region (they can be
configured to use different zones in that region, however).

To allow graceful upgrades, we only abort the standby master if both the
standby master and the leading master have a configured domain; if
either master has the unset (default) domain, the standby master does
not abort.

NOTE: It would be nice to have unit tests to validate this behavior, but
the current unit test infrastructure does not support starting multiple
masters (MESOS-2976).

Review: https://reviews.apache.org/r/59763/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a8c7ae44
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a8c7ae44
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a8c7ae44

Branch: refs/heads/master
Commit: a8c7ae44c85657d159026f1169596c2cd78b357d
Parents: ac2b2c8
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:35 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:35 2017 -0700

----------------------------------------------------------------------
 include/mesos/type_utils.hpp |  8 ++++++++
 include/mesos/v1/mesos.hpp   |  8 ++++++++
 src/master/master.cpp        | 29 +++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/include/mesos/type_utils.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp
index 3af1b23..3bbc1fe 100644
--- a/include/mesos/type_utils.hpp
+++ b/include/mesos/type_utils.hpp
@@ -264,6 +264,14 @@ inline bool operator!=(const DurationInfo& left, const DurationInfo& right)
 }
 
 
+inline bool operator!=(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() != right.name();
+}
+
+
 inline bool operator<(const ContainerID& left, const ContainerID& right)
 {
   return left.value() < right.value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/include/mesos/v1/mesos.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp
index 752b2b9..0ed78fc 100644
--- a/include/mesos/v1/mesos.hpp
+++ b/include/mesos/v1/mesos.hpp
@@ -263,6 +263,14 @@ inline bool operator!=(const DurationInfo& left, const DurationInfo& right)
 }
 
 
+inline bool operator!=(
+    const DomainInfo::FaultDomain::RegionInfo& left,
+    const DomainInfo::FaultDomain::RegionInfo& right)
+{
+  return left.name() != right.name();
+}
+
+
 inline bool operator<(const ContainerID& left, const ContainerID& right)
 {
   return left.value() < right.value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/a8c7ae44/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 39b2fea..8c38b9c 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2180,6 +2180,35 @@ void Master::detected(const Future<Option<MasterInfo>>& _leader)
     if (wasElected) {
       EXIT(EXIT_FAILURE) << "Lost leadership... committing suicide!";
     }
+
+    // If this master and the current leader both have a configured
+    // domain and the current leader is located in a different region,
+    // exit with an error message: this indicates a configuration
+    // error, since all masters must be in the same region.
+    if (leader->has_domain() && info_.has_domain()) {
+      const DomainInfo& leaderDomain = leader->domain();
+      const DomainInfo& selfDomain = info_.domain();
+
+      // We currently reject configured domains without fault domains,
+      // but that might change in the future. For compatibility with
+      // future versions of Mesos, we treat a master with a configured
+      // domain but no fault domain as equivalent to a master with no
+      // configured domain.
+      if (leaderDomain.has_fault_domain() && selfDomain.has_fault_domain()) {
+        const DomainInfo::FaultDomain::RegionInfo& leaderRegion =
+          leaderDomain.fault_domain().region();
+        const DomainInfo::FaultDomain::RegionInfo& selfRegion =
+          selfDomain.fault_domain().region();
+
+        if (leaderRegion != selfRegion) {
+          EXIT(EXIT_FAILURE) << "Leading master uses domain "
+                             << leaderDomain << "; this master is "
+                             << "configured to use domain "
+                             << selfDomain << "; all masters in the "
+                             << "same cluster must use the same region";
+        }
+      }
+    }
   }
 
   // Keep detecting.


[7/8] mesos git commit: Changed allocator to offer remote resources to region-aware frameworks.

Posted by ne...@apache.org.
Changed allocator to offer remote resources to region-aware frameworks.

Changed allocator to offer remote resources to region-aware frameworks.

Review: https://reviews.apache.org/r/59766/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/25111bb0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/25111bb0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/25111bb0

Branch: refs/heads/master
Commit: 25111bb03cd663a51e30b74feb2493203ea31d45
Parents: 5cf4934
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:43 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:43 2017 -0700

----------------------------------------------------------------------
 include/mesos/allocator/allocator.hpp       |   3 +-
 src/master/allocator/mesos/allocator.hpp    |  12 +-
 src/master/allocator/mesos/hierarchical.cpp |  54 +++++++-
 src/master/allocator/mesos/hierarchical.hpp |  13 +-
 src/master/master.cpp                       |   3 +-
 src/tests/allocator.hpp                     |   9 +-
 src/tests/api_tests.cpp                     |   4 +-
 src/tests/master_allocator_tests.cpp        |  36 +++---
 src/tests/master_quota_tests.cpp            |  20 +--
 src/tests/master_tests.cpp                  | 155 +++++++++++++++++++++++
 src/tests/reservation_tests.cpp             |   6 +-
 src/tests/resource_offers_tests.cpp         |   2 +-
 src/tests/slave_recovery_tests.cpp          |   2 +-
 13 files changed, 272 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/include/mesos/allocator/allocator.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/allocator/allocator.hpp b/include/mesos/allocator/allocator.hpp
index 9d116c6..537658b 100644
--- a/include/mesos/allocator/allocator.hpp
+++ b/include/mesos/allocator/allocator.hpp
@@ -94,7 +94,8 @@ public:
         inverseOfferCallback,
       const Option<std::set<std::string>>&
         fairnessExcludeResourceNames = None(),
-      bool filterGpuResources = true) = 0;
+      bool filterGpuResources = true,
+      const Option<DomainInfo>& domain = None()) = 0;
 
   /**
    * Informs the allocator of the recovered state from the master.

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/master/allocator/mesos/allocator.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/allocator.hpp b/src/master/allocator/mesos/allocator.hpp
index 725ec7c..903edf6 100644
--- a/src/master/allocator/mesos/allocator.hpp
+++ b/src/master/allocator/mesos/allocator.hpp
@@ -58,7 +58,8 @@ public:
         inverseOfferCallback,
       const Option<std::set<std::string>>&
         fairnessExcludeResourceNames = None(),
-      bool filterGpuResources = true);
+      bool filterGpuResources = true,
+      const Option<DomainInfo>& domain = None());
 
   void recover(
       const int expectedAgentCount,
@@ -197,7 +198,8 @@ public:
         inverseOfferCallback,
       const Option<std::set<std::string>>&
         fairnessExcludeResourceNames = None(),
-      bool filterGpuResources = true) = 0;
+      bool filterGpuResources = true,
+      const Option<DomainInfo>& domain = None()) = 0;
 
   virtual void recover(
       const int expectedAgentCount,
@@ -345,7 +347,8 @@ inline void MesosAllocator<AllocatorProcess>::initialize(
               const hashmap<SlaveID, UnavailableResources>&)>&
       inverseOfferCallback,
     const Option<std::set<std::string>>& fairnessExcludeResourceNames,
-    bool filterGpuResources)
+    bool filterGpuResources,
+    const Option<DomainInfo>& domain)
 {
   process::dispatch(
       process,
@@ -354,7 +357,8 @@ inline void MesosAllocator<AllocatorProcess>::initialize(
       offerCallback,
       inverseOfferCallback,
       fairnessExcludeResourceNames,
-      filterGpuResources);
+      filterGpuResources,
+      domain);
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index fad9330..f021c34 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -150,13 +150,15 @@ void HierarchicalAllocatorProcess::initialize(
              const hashmap<SlaveID, UnavailableResources>&)>&
       _inverseOfferCallback,
     const Option<set<string>>& _fairnessExcludeResourceNames,
-    bool _filterGpuResources)
+    bool _filterGpuResources,
+    const Option<DomainInfo>& _domain)
 {
   allocationInterval = _allocationInterval;
   offerCallback = _offerCallback;
   inverseOfferCallback = _inverseOfferCallback;
   fairnessExcludeResourceNames = _fairnessExcludeResourceNames;
   filterGpuResources = _filterGpuResources;
+  domain = _domain;
   initialized = true;
   paused = false;
 
@@ -560,6 +562,10 @@ void HierarchicalAllocatorProcess::addSlave(
   slave.hostname = slaveInfo.hostname();
   slave.capabilities = protobuf::slave::Capabilities(capabilities);
 
+  if (slaveInfo.has_domain()) {
+    slave.domain = slaveInfo.domain();
+  }
+
   // NOTE: We currently implement maintenance in the allocator to be able to
   // leverage state and features such as the FrameworkSorter and OfferFilter.
   if (unavailability.isSome()) {
@@ -1599,6 +1605,12 @@ void HierarchicalAllocatorProcess::__allocate()
           continue;
         }
 
+        // If this framework is not region-aware, don't offer it
+        // resources on agents in remote regions.
+        if (!framework.capabilities.regionAware && isRemoteSlave(slave)) {
+          continue;
+        }
+
         // Calculate the currently available resources on the slave, which
         // is the difference in non-shared resources between total and
         // allocated, plus all shared resources on the agent (if applicable).
@@ -1778,6 +1790,12 @@ void HierarchicalAllocatorProcess::__allocate()
           continue;
         }
 
+        // If this framework is not region-aware, don't offer it
+        // resources on agents in remote regions.
+        if (!framework.capabilities.regionAware && isRemoteSlave(slave)) {
+          continue;
+        }
+
         // Calculate the currently available resources on the slave, which
         // is the difference in non-shared resources between total and
         // allocated, plus all shared resources on the agent (if applicable).
@@ -2365,6 +2383,40 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
   return true;
 }
 
+
+bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
+{
+  // If the slave does not have a configured domain, assume it is not remote.
+  if (slave.domain.isNone()) {
+    return false;
+  }
+
+  // The current version of the Mesos agent refuses to startup if a
+  // domain is specified without also including a fault domain. That
+  // might change in the future, if more types of domains are added.
+  // For forward compatibility, we treat agents with a configured
+  // domain but no fault domain as having no configured domain.
+  if (!slave.domain->has_fault_domain()) {
+    return false;
+  }
+
+  // If the slave has a configured domain (and it has been allowed to
+  // register with the master), the master must also have a configured
+  // domain.
+  CHECK(domain.isSome());
+
+  // The master will not startup if configured with a domain but no
+  // fault domain.
+  CHECK(domain->has_fault_domain());
+
+  const DomainInfo::FaultDomain::RegionInfo& masterRegion =
+    domain->fault_domain().region();
+  const DomainInfo::FaultDomain::RegionInfo& slaveRegion =
+    slave.domain->fault_domain().region();
+
+  return masterRegion != slaveRegion;
+}
+
 } // namespace internal {
 } // namespace allocator {
 } // namespace master {

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 81d1b96..c234605 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -103,7 +103,8 @@ public:
         inverseOfferCallback,
       const Option<std::set<std::string>>&
         fairnessExcludeResourceNames = None(),
-      bool filterGpuResources = true);
+      bool filterGpuResources = true,
+      const Option<DomainInfo>& domain = None());
 
   void recover(
       const int _expectedAgentCount,
@@ -376,6 +377,8 @@ protected:
 
     protobuf::slave::Capabilities capabilities;
 
+    Option<DomainInfo> domain;
+
     // Represents a scheduled unavailability due to maintenance for a specific
     // slave, and the responses from frameworks as to whether they will be able
     // to gracefully handle this unavailability.
@@ -448,6 +451,9 @@ protected:
   // Filter GPU resources based on the `GPU_RESOURCES` framework capability.
   bool filterGpuResources;
 
+  // The master's domain, if any.
+  Option<DomainInfo> domain;
+
   // There are two stages of allocation. During the first stage resources
   // are allocated only to frameworks in roles with quota set. During the
   // second stage remaining resources that would not be required to satisfy
@@ -527,6 +533,11 @@ private:
   // and the role and quota sorters (whose total resources match the agent's
   // total resources). Returns true iff the stored agent total was changed.
   bool updateSlaveTotal(const SlaveID& slaveId, const Resources& total);
+
+  // Helper that returns true if the given agent is located in a
+  // different region than the master. This can only be the case if
+  // the agent and the master are both configured with a fault domain.
+  bool isRemoteSlave(const Slave& slave) const;
 };
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 68b1c71..eb660cc 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -768,7 +768,8 @@ void Master::initialize()
       defer(self(), &Master::offer, lambda::_1, lambda::_2),
       defer(self(), &Master::inverseOffer, lambda::_1, lambda::_2),
       flags.fair_sharing_excluded_resource_names,
-      flags.filter_gpu_resources);
+      flags.filter_gpu_resources,
+      flags.domain);
 
   // Parse the whitelist. Passing Allocator::updateWhitelist()
   // callback is safe because we shut down the whitelistWatcher in

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/allocator.hpp
----------------------------------------------------------------------
diff --git a/src/tests/allocator.hpp b/src/tests/allocator.hpp
index a990788..f1c0d14 100644
--- a/src/tests/allocator.hpp
+++ b/src/tests/allocator.hpp
@@ -229,9 +229,9 @@ public:
     // to get the best of both worlds: the ability to use 'DoDefault'
     // and no warnings when expectations are not explicit.
 
-    ON_CALL(*this, initialize(_, _, _, _, _))
+    ON_CALL(*this, initialize(_, _, _, _, _, _))
       .WillByDefault(InvokeInitialize(this));
-    EXPECT_CALL(*this, initialize(_, _, _, _, _))
+    EXPECT_CALL(*this, initialize(_, _, _, _, _, _))
       .WillRepeatedly(DoDefault());
 
     ON_CALL(*this, recover(_, _))
@@ -357,7 +357,7 @@ public:
 
   virtual ~TestAllocator() {}
 
-  MOCK_METHOD5(initialize, void(
+  MOCK_METHOD6(initialize, void(
       const Duration&,
       const lambda::function<
           void(const FrameworkID&,
@@ -366,7 +366,8 @@ public:
           void(const FrameworkID&,
                const hashmap<SlaveID, UnavailableResources>&)>&,
       const Option<std::set<std::string>>&,
-      bool));
+      bool,
+      const Option<DomainInfo>&));
 
   MOCK_METHOD2(recover, void(
       const int expectedAgentCount,

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/api_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/api_tests.cpp b/src/tests/api_tests.cpp
index a460e56..f22ca28 100644
--- a/src/tests/api_tests.cpp
+++ b/src/tests/api_tests.cpp
@@ -967,7 +967,7 @@ TEST_P(MasterAPITest, ReserveResources)
 {
   TestAllocator<> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Set a low allocation interval to speed up this test.
   master::Flags flags = MesosTest::CreateMasterFlags();
@@ -1059,7 +1059,7 @@ TEST_P(MasterAPITest, UnreserveResources)
 {
   TestAllocator<> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Set a low allocation interval to speed up this test.
   master::Flags flags = MesosTest::CreateMasterFlags();

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/master_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_allocator_tests.cpp b/src/tests/master_allocator_tests.cpp
index f83ca66..f8b315c 100644
--- a/src/tests/master_allocator_tests.cpp
+++ b/src/tests/master_allocator_tests.cpp
@@ -164,7 +164,7 @@ TYPED_TEST(MasterAllocatorTest, SingleFramework)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -212,7 +212,7 @@ TYPED_TEST(MasterAllocatorTest, ResourcesUnused)
 
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -321,7 +321,7 @@ TYPED_TEST(MasterAllocatorTest, OutOfOrderDispatch)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -450,7 +450,7 @@ TYPED_TEST(MasterAllocatorTest, SchedulerFailover)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -576,7 +576,7 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
@@ -736,7 +736,7 @@ TYPED_TEST(MasterAllocatorTest, SlaveLost)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -860,7 +860,7 @@ TYPED_TEST(MasterAllocatorTest, SlaveAdded)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
@@ -956,7 +956,7 @@ TYPED_TEST(MasterAllocatorTest, TaskFinished)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
@@ -1060,7 +1060,7 @@ TYPED_TEST(MasterAllocatorTest, CpusOnlyOfferedAndTaskLaunched)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
@@ -1141,7 +1141,7 @@ TYPED_TEST(MasterAllocatorTest, MemoryOnlyOfferedAndTaskLaunched)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
@@ -1235,7 +1235,7 @@ TYPED_TEST(MasterAllocatorTest, Whitelist)
 
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Future<Nothing> updateWhitelist1;
   EXPECT_CALL(allocator, updateWhitelist(Option<hashset<string>>(hosts)))
@@ -1274,7 +1274,7 @@ TYPED_TEST(MasterAllocatorTest, RoleTest)
 {
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.roles = Some("role2");
@@ -1367,7 +1367,7 @@ TYPED_TEST(MasterAllocatorTest, FrameworkReregistersFirst)
   {
     TestAllocator<TypeParam> allocator;
 
-    EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+    EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
     Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
     ASSERT_SOME(master);
@@ -1424,7 +1424,7 @@ TYPED_TEST(MasterAllocatorTest, FrameworkReregistersFirst)
   {
     TestAllocator<TypeParam> allocator2;
 
-    EXPECT_CALL(allocator2, initialize(_, _, _, _, _));
+    EXPECT_CALL(allocator2, initialize(_, _, _, _, _, _));
 
     Future<Nothing> addFramework;
     EXPECT_CALL(allocator2, addFramework(_, _, _, _, _))
@@ -1491,7 +1491,7 @@ TYPED_TEST(MasterAllocatorTest, SlaveReregistersFirst)
   {
     TestAllocator<TypeParam> allocator;
 
-    EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+    EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
     Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
     ASSERT_SOME(master);
@@ -1547,7 +1547,7 @@ TYPED_TEST(MasterAllocatorTest, SlaveReregistersFirst)
   {
     TestAllocator<TypeParam> allocator2;
 
-    EXPECT_CALL(allocator2, initialize(_, _, _, _, _));
+    EXPECT_CALL(allocator2, initialize(_, _, _, _, _, _));
 
     Future<Nothing> addSlave;
     EXPECT_CALL(allocator2, addSlave(_, _, _, _, _, _))
@@ -1614,7 +1614,7 @@ TYPED_TEST(MasterAllocatorTest, RebalancedForUpdatedWeights)
 
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Start Mesos master.
   master::Flags masterFlags = this->CreateMasterFlags();
@@ -1808,7 +1808,7 @@ TYPED_TEST(MasterAllocatorTest, NestedRoles)
 
   TestAllocator<TypeParam> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   Try<Owned<cluster::Master>> master =

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/master_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_quota_tests.cpp b/src/tests/master_quota_tests.cpp
index bbdbfbe..9d52f76 100644
--- a/src/tests/master_quota_tests.cpp
+++ b/src/tests/master_quota_tests.cpp
@@ -408,7 +408,7 @@ TEST_F(MasterQuotaTest, SetExistingQuota)
 TEST_F(MasterQuotaTest, RemoveSingleQuota)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -580,7 +580,7 @@ TEST_F(MasterQuotaTest, Status)
 TEST_F(MasterQuotaTest, InsufficientResourcesSingleAgent)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -642,7 +642,7 @@ TEST_F(MasterQuotaTest, InsufficientResourcesSingleAgent)
 TEST_F(MasterQuotaTest, InsufficientResourcesMultipleAgents)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -719,7 +719,7 @@ TEST_F(MasterQuotaTest, InsufficientResourcesMultipleAgents)
 TEST_F(MasterQuotaTest, AvailableResourcesSingleAgent)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -769,7 +769,7 @@ TEST_F(MasterQuotaTest, AvailableResourcesSingleAgent)
 TEST_F(MasterQuotaTest, AvailableResourcesMultipleAgents)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -838,7 +838,7 @@ TEST_F(MasterQuotaTest, AvailableResourcesMultipleAgents)
 TEST_F(MasterQuotaTest, AvailableResourcesAfterRescinding)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);
@@ -1074,7 +1074,7 @@ TEST_F(MasterQuotaTest, RecoverQuotaEmptyCluster)
   }
 
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Restart the master; configured quota should be recovered from the registry.
   master->reset();
@@ -1108,7 +1108,7 @@ TEST_F(MasterQuotaTest, RecoverQuotaEmptyCluster)
 TEST_F(MasterQuotaTest, NoAuthenticationNoAuthorization)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Disable http_readwrite authentication and authorization.
   // TODO(alexr): Setting master `--acls` flag to `ACLs()` or `None()` seems
@@ -1218,7 +1218,7 @@ TEST_F(MasterQuotaTest, UnauthenticatedQuotaRequest)
 TEST_F(MasterQuotaTest, AuthorizeGetUpdateQuotaRequests)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   // Setup ACLs so that only the default principal can modify quotas
   // for `ROLE1` and read status.
@@ -1782,7 +1782,7 @@ TEST_F(MasterQuotaTest, DISABLED_ChildRoleDeleteParentQuota)
 TEST_F(MasterQuotaTest, DISABLED_ClusterCapacityWithNestedRoles)
 {
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index effb96b..e070e87 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -7362,6 +7362,161 @@ TEST_F(MasterTest, MultiRoleSchedulerUnsubscribeFromRole)
 }
 
 
+// This test checks that if the agent and master are configured with
+// domains that specify the same region (but different zones), the
+// agent is allowed to register and its resources are offered to
+// frameworks as usual.
+TEST_F(MasterTest, AgentDomainSameRegion)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.domain = createDomainInfo("region-abc", "zone-456");
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  Future<MasterInfo> masterInfo;
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .WillOnce(FutureArg<2>(&masterInfo));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers));
+
+  driver.start();
+
+  AWAIT_READY(masterInfo);
+  EXPECT_EQ(masterFlags.domain, masterInfo->domain());
+
+  AWAIT_READY(offers);
+  ASSERT_FALSE(offers->empty());
+
+  driver.stop();
+  driver.join();
+}
+
+
+// This test checks that if the agent and master are configured with
+// domains that specify different regions, the agent is allowed to
+// register but its resources are only offered to region-aware
+// frameworks. We also check that tasks can be launched in remote
+// regions.
+TEST_F(MasterTest, AgentDomainDifferentRegion)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.domain = createDomainInfo("region-xyz", "zone-123");
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Launch a non-region-aware scheduler. It should NOT receive any
+  // resource offers for `slave`.
+  {
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    Future<Nothing> registered;
+    EXPECT_CALL(sched, registered(&driver, _, _))
+      .WillOnce(FutureSatisfy(&registered));
+
+    // We do not expect to get offered any resources.
+    Future<vector<Offer>> offers;
+    EXPECT_CALL(sched, resourceOffers(&driver, _))
+      .Times(0);
+
+    driver.start();
+
+    AWAIT_READY(registered);
+
+    // Trigger a batch allocation, for good measure.
+    Clock::advance(masterFlags.allocation_interval);
+    Clock::settle();
+
+    driver.stop();
+    driver.join();
+  }
+
+  // Launch a region-aware scheduler. It should receive an offer for `slave`.
+  {
+    FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+    frameworkInfo.add_capabilities()->set_type(
+        FrameworkInfo::Capability::REGION_AWARE);
+
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    EXPECT_CALL(sched, registered(&driver, _, _));
+
+    Future<vector<Offer>> offers;
+    EXPECT_CALL(sched, resourceOffers(&driver, _))
+      .WillOnce(FutureArg<1>(&offers));
+
+    driver.start();
+
+    AWAIT_READY(offers);
+    ASSERT_FALSE(offers->empty());
+
+    Offer offer = offers->front();
+
+    // Check that we can launch a task in a remote region.
+    TaskInfo task = createTask(offer, "sleep 60");
+
+    Future<TaskStatus> runningStatus;
+    EXPECT_CALL(sched, statusUpdate(&driver, _))
+      .WillOnce(FutureArg<1>(&runningStatus));
+
+    driver.launchTasks(offer.id(), {task});
+
+    AWAIT_READY(runningStatus);
+    EXPECT_EQ(TASK_RUNNING, runningStatus->state());
+    EXPECT_EQ(task.task_id(), runningStatus->task_id());
+
+    driver.stop();
+    driver.join();
+  }
+
+  // Resume the clock so that executor/task cleanup happens correctly.
+  //
+  // TODO(neilc): Replace this with more fine-grained clock advancement.
+  Clock::resume();
+}
+
+
 // This test checks that if the master is configured with a domain but
 // the agent is not, the agent is allowed to register and its
 // resources are offered to frameworks as usual.

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/reservation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/reservation_tests.cpp b/src/tests/reservation_tests.cpp
index 3b4884b..470f734 100644
--- a/src/tests/reservation_tests.cpp
+++ b/src/tests/reservation_tests.cpp
@@ -531,7 +531,7 @@ TEST_F(ReservationTest, DropReserveTooLarge)
   masterFlags.allocation_interval = Milliseconds(5);
   masterFlags.roles = frameworkInfo.role();
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator, masterFlags);
   ASSERT_SOME(master);
@@ -2053,7 +2053,7 @@ TEST_F(ReservationTest, DropReserveWithDifferentRole)
   masterFlags.allocation_interval = Milliseconds(5);
 
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator, masterFlags);
   ASSERT_SOME(master);
@@ -2150,7 +2150,7 @@ TEST_F(ReservationTest, PreventUnreservingAlienResources)
   masterFlags.allocation_interval = Milliseconds(5);
 
   TestAllocator<> allocator;
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator, masterFlags);
   ASSERT_SOME(master);

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/resource_offers_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/resource_offers_tests.cpp b/src/tests/resource_offers_tests.cpp
index 427a652..e1fcab4 100644
--- a/src/tests/resource_offers_tests.cpp
+++ b/src/tests/resource_offers_tests.cpp
@@ -284,7 +284,7 @@ TEST_F(ResourceOffersTest, Request)
 {
   TestAllocator<master::allocator::HierarchicalDRFAllocator> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = StartMaster(&allocator);
   ASSERT_SOME(master);

http://git-wip-us.apache.org/repos/asf/mesos/blob/25111bb0/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index f6eafcb..1cd248a 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3383,7 +3383,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
 {
   TestAllocator<master::allocator::HierarchicalDRFAllocator> allocator;
 
-  EXPECT_CALL(allocator, initialize(_, _, _, _, _));
+  EXPECT_CALL(allocator, initialize(_, _, _, _, _, _));
 
   Try<Owned<cluster::Master>> master = this->StartMaster(&allocator);
   ASSERT_SOME(master);


[2/8] mesos git commit: Added REGION_AWARE framework capability.

Posted by ne...@apache.org.
Added REGION_AWARE framework capability.

Added REGION_AWARE framework capability.

Review: https://reviews.apache.org/r/59760/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4189a450
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4189a450
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4189a450

Branch: refs/heads/master
Commit: 4189a4504bf0ccc2b47e0edcf0a43fde94a7cbca
Parents: 1789278
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:19 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:19 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto          | 12 ++++++++++++
 include/mesos/v1/mesos.proto       | 12 ++++++++++++
 src/common/protobuf_utils.hpp      |  4 ++++
 src/tests/protobuf_utils_tests.cpp |  6 ++++++
 4 files changed, 34 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/4189a450/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 8355371..2ee3861 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -369,6 +369,18 @@ message FrameworkInfo {
       // reservations if it uses the `Resource.reservations` field, and
       // `Resource.reservations_size() > 1`.
       RESERVATION_REFINEMENT = 7; // EXPERIMENTAL.
+
+      // Indicates that the framework is prepared to receive offers
+      // for agents whose region is different from the master's
+      // region. Network links between hosts in different regions
+      // typically have higher latency and lower bandwidth than
+      // network links within a region, so frameworks should be
+      // careful to only place suitable workloads in remote regions.
+      // Frameworks that are not region-aware will never receive
+      // offers for remote agents; region-aware frameworks are assumed
+      // to implement their own logic to decide which workloads (if
+      // any) are suitable for placement on remote agents.
+      REGION_AWARE = 8;
     }
 
     // Enum fields should be optional, see: MESOS-4997.

http://git-wip-us.apache.org/repos/asf/mesos/blob/4189a450/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 4d5cebd..b143df8 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -369,6 +369,18 @@ message FrameworkInfo {
       // reservations if it uses the `Resource.reservations` field, and
       // `Resource.reservations_size() > 1`.
       RESERVATION_REFINEMENT = 7; // EXPERIMENTAL.
+
+      // Indicates that the framework is prepared to receive offers
+      // for agents whose region is different from the master's
+      // region. Network links between hosts in different regions
+      // typically have higher latency and lower bandwidth than
+      // network links within a region, so frameworks should be
+      // careful to only place suitable workloads in remote regions.
+      // Frameworks that are not region-aware will never receive
+      // offers for remote agents; region-aware frameworks are assumed
+      // to implement their own logic to decide which workloads (if
+      // any) are suitable for placement on remote agents.
+      REGION_AWARE = 8;
     }
 
     // Enum fields should be optional, see: MESOS-4997.

http://git-wip-us.apache.org/repos/asf/mesos/blob/4189a450/src/common/protobuf_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp
index 5476d2e..2156f6d 100644
--- a/src/common/protobuf_utils.hpp
+++ b/src/common/protobuf_utils.hpp
@@ -355,6 +355,9 @@ struct Capabilities
         case FrameworkInfo::Capability::RESERVATION_REFINEMENT:
           reservationRefinement = true;
           break;
+        case FrameworkInfo::Capability::REGION_AWARE:
+          regionAware = true;
+          break;
       }
     }
   }
@@ -367,6 +370,7 @@ struct Capabilities
   bool partitionAware = false;
   bool multiRole = false;
   bool reservationRefinement = false;
+  bool regionAware = false;
 };
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4189a450/src/tests/protobuf_utils_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/protobuf_utils_tests.cpp b/src/tests/protobuf_utils_tests.cpp
index 6ec3a8e..2be26a5 100644
--- a/src/tests/protobuf_utils_tests.cpp
+++ b/src/tests/protobuf_utils_tests.cpp
@@ -220,6 +220,9 @@ TEST(ProtobufUtilTest, FrameworkCapabilities)
     if (capabilities.multiRole) {
       result.insert(FrameworkInfo::Capability::MULTI_ROLE);
     }
+    if (capabilities.regionAware) {
+      result.insert(FrameworkInfo::Capability::REGION_AWARE);
+    }
 
     return result;
   };
@@ -266,6 +269,9 @@ TEST(ProtobufUtilTest, FrameworkCapabilities)
 
   expected = { FrameworkInfo::Capability::MULTI_ROLE };
   EXPECT_EQ(expected, backAndForth(expected));
+
+  expected = { FrameworkInfo::Capability::REGION_AWARE };
+  EXPECT_EQ(expected, backAndForth(expected));
 }
 
 


[4/8] mesos git commit: Added domain to MasterInfo and SlaveInfo.

Posted by ne...@apache.org.
Added domain to MasterInfo and SlaveInfo.

This means that each master's domain is stored in ZooKeeper, along with
the rest of the MasterInfo protobuf message.

Each agent's domain is stored as part of its checkpointed resources.
Changing the agent's domain requires a full drain of the agent; this
behavior might be relaxed in the future.

Review: https://reviews.apache.org/r/59762/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ac2b2c8d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ac2b2c8d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ac2b2c8d

Branch: refs/heads/master
Commit: ac2b2c8dfd03bb5243ac18025b025c1e60af7224
Parents: 5b99d39
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:21 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:21 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto    |  10 +++
 include/mesos/v1/mesos.proto |  10 +++
 src/common/http.cpp          |  33 +++++++++
 src/common/http.hpp          |   1 +
 src/common/type_utils.cpp    |   6 +-
 src/internal/evolve.cpp      |   6 ++
 src/internal/evolve.hpp      |   1 +
 src/master/http.cpp          |  12 ++++
 src/master/master.cpp        |   4 ++
 src/slave/http.cpp           |   4 ++
 src/slave/slave.cpp          |   4 ++
 src/tests/api_tests.cpp      |  34 ++++++---
 src/tests/master_tests.cpp   | 144 +++++++++++++++++++++++++++++++++++++-
 src/tests/mesos.hpp          |  22 ++++++
 src/tests/slave_tests.cpp    |  79 +++++++++++++++++++++
 src/v1/mesos.cpp             |   6 +-
 16 files changed, 362 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 2ee3861..64ec085 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -865,6 +865,10 @@ message MasterInfo {
   // and supersedes the use of `ip`, `port` and `hostname`.
   // Since Mesos 0.24.
   optional Address address = 7;
+
+  // The domain that this master belongs to. All masters in a Mesos
+  // cluster should belong to the same region.
+  optional DomainInfo domain = 8;
 }
 
 
@@ -885,6 +889,12 @@ message SlaveInfo {
   repeated Attribute attributes = 5;
   optional SlaveID id = 6;
 
+  // The domain that this slave belongs to. If the slave's region
+  // differs from the master's region, it will not appear in resource
+  // offers to frameworks that have not enabled the REGION_AWARE
+  // capability.
+  optional DomainInfo domain = 10;
+
   // Slave checkpointing is always enabled in recent Mesos versions;
   // the value of this field is ignored.
   // TODO(joerg84): Remove checkpoint field after deprecation cycle starting

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index b143df8..5b8d00d 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -865,6 +865,10 @@ message MasterInfo {
   // and supersedes the use of `ip`, `port` and `hostname`.
   // Since Mesos 0.24.
   optional Address address = 7;
+
+  // The domain that this master belongs to. All masters in a Mesos
+  // cluster should belong to the same region.
+  optional DomainInfo domain = 8;
 }
 
 
@@ -885,6 +889,12 @@ message AgentInfo {
   repeated Attribute attributes = 5;
   optional AgentID id = 6;
 
+  // The domain that this agent belongs to. If the agent's region
+  // differs from the master's region, it will not appear in resource
+  // offers to frameworks that have not enabled the REGION_AWARE
+  // capability.
+  optional DomainInfo domain = 10;
+
   message Capability {
     enum Type {
       // This must be the first enum value in this list, to

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/common/http.cpp
----------------------------------------------------------------------
diff --git a/src/common/http.cpp b/src/common/http.cpp
index fdb591e..7dce4cd 100644
--- a/src/common/http.cpp
+++ b/src/common/http.cpp
@@ -717,6 +717,39 @@ void json(JSON::ObjectWriter* writer, const TaskStatus& status)
 }
 
 
+static void json(
+    JSON::ObjectWriter* writer,
+    const DomainInfo::FaultDomain::RegionInfo& regionInfo)
+{
+  writer->field("name", regionInfo.name());
+}
+
+
+static void json(
+    JSON::ObjectWriter* writer,
+    const DomainInfo::FaultDomain::ZoneInfo& zoneInfo)
+{
+  writer->field("name", zoneInfo.name());
+}
+
+
+static void json(
+    JSON::ObjectWriter* writer,
+    const DomainInfo::FaultDomain& faultDomain)
+{
+    writer->field("region", faultDomain.region());
+    writer->field("zone", faultDomain.zone());
+}
+
+
+void json(JSON::ObjectWriter* writer, const DomainInfo& domainInfo)
+{
+  if (domainInfo.has_fault_domain()) {
+    writer->field("fault_domain", domainInfo.fault_domain());
+  }
+}
+
+
 static void json(JSON::NumberWriter* writer, const Value::Scalar& scalar)
 {
   writer->set(scalar.value());

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/common/http.hpp
----------------------------------------------------------------------
diff --git a/src/common/http.hpp b/src/common/http.hpp
index b7e4a8a..93c9b2e 100644
--- a/src/common/http.hpp
+++ b/src/common/http.hpp
@@ -133,6 +133,7 @@ void json(JSON::ArrayWriter* writer, const Labels& labels);
 void json(JSON::ObjectWriter* writer, const Resources& resources);
 void json(JSON::ObjectWriter* writer, const Task& task);
 void json(JSON::ObjectWriter* writer, const TaskStatus& status);
+void json(JSON::ObjectWriter* writer, const DomainInfo& domainInfo);
 
 namespace authorization {
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index a43a6c8..031344c 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -345,7 +345,8 @@ bool operator==(const MasterInfo& left, const MasterInfo& right)
     left.port() == right.port() &&
     left.pid() == right.pid() &&
     left.hostname() == right.hostname() &&
-    left.version() == right.version();
+    left.version() == right.version() &&
+    left.domain() == right.domain();
 }
 
 
@@ -392,7 +393,8 @@ bool operator==(const SlaveInfo& left, const SlaveInfo& right)
     Attributes(left.attributes()) == Attributes(right.attributes()) &&
     left.id() == right.id() &&
     left.checkpoint() == right.checkpoint() &&
-    left.port() == right.port();
+    left.port() == right.port() &&
+    left.domain() == right.domain();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/internal/evolve.cpp
----------------------------------------------------------------------
diff --git a/src/internal/evolve.cpp b/src/internal/evolve.cpp
index 93196f3..3ac55ac 100644
--- a/src/internal/evolve.cpp
+++ b/src/internal/evolve.cpp
@@ -86,6 +86,12 @@ v1::AgentInfo evolve(const SlaveInfo& slaveInfo)
 }
 
 
+v1::DomainInfo evolve(const DomainInfo& domainInfo)
+{
+  return evolve<v1::DomainInfo>(domainInfo);
+}
+
+
 v1::ExecutorID evolve(const ExecutorID& executorId)
 {
   return evolve<v1::ExecutorID>(executorId);

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/internal/evolve.hpp
----------------------------------------------------------------------
diff --git a/src/internal/evolve.hpp b/src/internal/evolve.hpp
index 9db5fe6..42ead34 100644
--- a/src/internal/evolve.hpp
+++ b/src/internal/evolve.hpp
@@ -58,6 +58,7 @@ namespace internal {
 // Helpers for evolving types between versions. Please add as necessary!
 v1::AgentID evolve(const SlaveID& slaveId);
 v1::AgentInfo evolve(const SlaveInfo& slaveInfo);
+v1::DomainInfo evolve(const DomainInfo& domainInfo);
 v1::ExecutorID evolve(const ExecutorID& executorId);
 v1::ExecutorInfo evolve(const ExecutorInfo& executorInfo);
 v1::FileInfo evolve(const FileInfo& fileInfo);

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index dbf3d0f..69f7561 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -161,6 +161,10 @@ static void json(JSON::ObjectWriter* writer, const MasterInfo& info)
   writer->field("pid", info.pid());
   writer->field("port", info.port());
   writer->field("hostname", info.hostname());
+
+  if (info.has_domain()) {
+    writer->field("domain", info.domain());
+  }
 }
 
 
@@ -170,6 +174,10 @@ static void json(JSON::ObjectWriter* writer, const SlaveInfo& slaveInfo)
   writer->field("hostname", slaveInfo.hostname());
   writer->field("port", slaveInfo.port());
   writer->field("attributes", Attributes(slaveInfo.attributes()));
+
+  if (slaveInfo.has_domain()) {
+    writer->field("domain", slaveInfo.domain());
+  }
 }
 
 namespace internal {
@@ -2828,6 +2836,10 @@ Future<Response> Master::Http::state(
         writer->field("deactivated_slaves", master->_slaves_inactive());
         writer->field("unreachable_slaves", master->_slaves_unreachable());
 
+        if (master->info().has_domain()) {
+          writer->field("domain", master->info().domain());
+        }
+
         // TODO(haosdent): Deprecated this in favor of `leader_info` below.
         if (master->leader.isSome()) {
           writer->field("leader", master->leader->pid());

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 7668749..39b2fea 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -358,6 +358,10 @@ Master::Master(
   info_.mutable_address()->set_ip(stringify(self().address.ip));
   info_.mutable_address()->set_port(self().address.port);
   info_.mutable_address()->set_hostname(hostname);
+
+  if (flags.domain.isSome()) {
+    info_.mutable_domain()->CopyFrom(flags.domain.get());
+  }
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/slave/http.cpp
----------------------------------------------------------------------
diff --git a/src/slave/http.cpp b/src/slave/http.cpp
index 700871e..3070b3b 100644
--- a/src/slave/http.cpp
+++ b/src/slave/http.cpp
@@ -1302,6 +1302,10 @@ Future<Response> Http::state(
         writer->field("hostname", slave->info.hostname());
         writer->field("capabilities", AGENT_CAPABILITIES());
 
+        if (slave->info.has_domain()) {
+          writer->field("domain", slave->info.domain());
+        }
+
         const Resources& totalResources = slave->totalResources;
 
         writer->field("resources", totalResources);

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index a1a6b64..beb0c79 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -575,6 +575,10 @@ void Slave::initialize()
   // Checkpointing of slaves is always enabled.
   info.set_checkpoint(true);
 
+  if (flags.domain.isSome()) {
+    info.mutable_domain()->CopyFrom(flags.domain.get());
+  }
+
   LOG(INFO) << "Agent hostname: " << info.hostname();
 
   statusUpdateManager->initialize(defer(self(), &Slave::forward, lambda::_1)

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/tests/api_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/api_tests.cpp b/src/tests/api_tests.cpp
index cdaa724..a460e56 100644
--- a/src/tests/api_tests.cpp
+++ b/src/tests/api_tests.cpp
@@ -145,7 +145,10 @@ INSTANTIATE_TEST_CASE_P(
 
 TEST_P(MasterAPITest, GetAgents)
 {
-  Try<Owned<cluster::Master>> master = this->StartMaster();
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = this->StartMaster(masterFlags);
   ASSERT_SOME(master);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
@@ -154,10 +157,11 @@ TEST_P(MasterAPITest, GetAgents)
   Future<SlaveRegisteredMessage> agentRegisteredMessage =
     FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _);
 
-  slave::Flags flags = CreateSlaveFlags();
-  flags.hostname = "host";
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.hostname = "host";
+  slaveFlags.domain = createDomainInfo("region-xyz", "zone-456");
 
-  Try<Owned<cluster::Slave>> agent = StartSlave(detector.get(), flags);
+  Try<Owned<cluster::Slave>> agent = StartSlave(detector.get(), slaveFlags);
   ASSERT_SOME(agent);
 
   AWAIT_READY(agentRegisteredMessage);
@@ -179,6 +183,7 @@ TEST_P(MasterAPITest, GetAgents)
       v1Response->get_agents().agents(0);
 
   ASSERT_EQ("host", v1Agent.agent_info().hostname());
+  ASSERT_EQ(evolve(slaveFlags.domain.get()), v1Agent.agent_info().domain());
   ASSERT_EQ(agent.get()->pid, v1Agent.pid());
   ASSERT_TRUE(v1Agent.active());
   ASSERT_EQ(MESOS_VERSION, v1Agent.version());
@@ -930,7 +935,10 @@ TEST_P(MasterAPITest, GetRoles)
 
 TEST_P(MasterAPITest, GetMaster)
 {
-  Try<Owned<cluster::Master>> master = this->StartMaster();
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = this->StartMaster(masterFlags);
   ASSERT_SOME(master);
 
   v1::master::Call v1Call;
@@ -944,8 +952,12 @@ TEST_P(MasterAPITest, GetMaster)
   AWAIT_READY(v1Response);
   ASSERT_TRUE(v1Response->IsInitialized());
   ASSERT_EQ(v1::master::Response::GET_MASTER, v1Response->type());
-  ASSERT_EQ(master.get()->getMasterInfo().ip(),
-            v1Response->get_master().master_info().ip());
+
+  const mesos::v1::MasterInfo& masterInfo =
+    v1Response->get_master().master_info();
+
+  ASSERT_EQ(evolve(masterFlags.domain.get()), masterInfo.domain());
+  ASSERT_EQ(master.get()->getMasterInfo().ip(), masterInfo.ip());
 }
 
 
@@ -3439,6 +3451,7 @@ TEST_P(AgentAPITest, GetAgent)
 
   slave::Flags flags = CreateSlaveFlags();
   flags.hostname = "host";
+  flags.domain = createDomainInfo("region-xyz", "zone-456");
 
   StandaloneMasterDetector detector;
   Try<Owned<cluster::Slave>> slave = this->StartSlave(&detector, flags);
@@ -3461,8 +3474,11 @@ TEST_P(AgentAPITest, GetAgent)
   AWAIT_READY(v1Response);
   ASSERT_TRUE(v1Response->IsInitialized());
   ASSERT_EQ(v1::agent::Response::GET_AGENT, v1Response->type());
-  ASSERT_EQ(flags.hostname,
-            v1Response->get_agent().agent_info().hostname());
+
+  const mesos::v1::AgentInfo& agentInfo = v1Response->get_agent().agent_info();
+
+  ASSERT_EQ(flags.hostname, agentInfo.hostname());
+  ASSERT_EQ(evolve(flags.domain.get()), agentInfo.domain());
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 9cfa510..ac33e93 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -800,6 +800,144 @@ TEST_F(MasterTest, StatusUpdateAck)
 }
 
 
+// This test checks that domain information is correctly returned by
+// the master's HTTP endpoints.
+TEST_F(MasterTest, DomainEndpoints)
+{
+  const string MASTER_REGION = "region-abc";
+  const string MASTER_ZONE = "zone-123";
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo(MASTER_REGION, MASTER_ZONE);
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  const string AGENT_REGION = "region-xyz";
+  const string AGENT_ZONE = "zone-456";
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  slaveFlags.domain = createDomainInfo(AGENT_REGION, AGENT_ZONE);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Query the "/state" master endpoint.
+  {
+    Future<Response> response = process::http::get(
+        master.get()->pid,
+        "state",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+
+    Result<JSON::String> masterRegion = parse->find<JSON::String>(
+        "domain.fault_domain.region.name");
+    Result<JSON::String> masterZone = parse->find<JSON::String>(
+        "domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(MASTER_REGION), masterRegion);
+    EXPECT_SOME_EQ(JSON::String(MASTER_ZONE), masterZone);
+
+    Result<JSON::String> leaderRegion = parse->find<JSON::String>(
+        "leader_info.domain.fault_domain.region.name");
+    Result<JSON::String> leaderZone = parse->find<JSON::String>(
+        "leader_info.domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(MASTER_REGION), leaderRegion);
+    EXPECT_SOME_EQ(JSON::String(MASTER_ZONE), leaderZone);
+
+    Result<JSON::String> agentRegion = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.region.name");
+    Result<JSON::String> agentZone = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(AGENT_REGION), agentRegion);
+    EXPECT_SOME_EQ(JSON::String(AGENT_ZONE), agentZone);
+  }
+
+  // Query the "/state-summary" master endpoint.
+  {
+    Future<Response> response = process::http::get(
+        master.get()->pid,
+        "state-summary",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+
+    Result<JSON::String> agentRegion = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.region.name");
+    Result<JSON::String> agentZone = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(AGENT_REGION), agentRegion);
+    EXPECT_SOME_EQ(JSON::String(AGENT_ZONE), agentZone);
+  }
+
+  // Query the "/slaves" master endpoint.
+  {
+    Future<Response> response = process::http::get(
+        master.get()->pid,
+        "slaves",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+
+    Result<JSON::String> agentRegion = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.region.name");
+    Result<JSON::String> agentZone = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(AGENT_REGION), agentRegion);
+    EXPECT_SOME_EQ(JSON::String(AGENT_ZONE), agentZone);
+  }
+
+  // Query the "/state" agent endpoint.
+  {
+    Future<Response> response = process::http::get(
+        slave.get()->pid,
+        "state",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+
+    Result<JSON::String> agentRegion = parse->find<JSON::String>(
+        "domain.fault_domain.region.name");
+    Result<JSON::String> agentZone = parse->find<JSON::String>(
+        "domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(AGENT_REGION), agentRegion);
+    EXPECT_SOME_EQ(JSON::String(AGENT_ZONE), agentZone);
+  }
+}
+
+
 TEST_F(MasterTest, RecoverResources)
 {
   master::Flags masterFlags = CreateMasterFlags();
@@ -1110,7 +1248,10 @@ TEST_F(MasterTest, MultipleExecutors)
 
 TEST_F(MasterTest, MasterInfo)
 {
-  Try<Owned<cluster::Master>> master = StartMaster();
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-xyz");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
   ASSERT_SOME(master);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
@@ -1131,6 +1272,7 @@ TEST_F(MasterTest, MasterInfo)
   driver.start();
 
   AWAIT_READY(masterInfo);
+  EXPECT_EQ(masterFlags.domain, masterInfo->domain());
   EXPECT_EQ(master.get()->pid.address.port, masterInfo->port());
   EXPECT_EQ(
       master.get()->pid.address.ip,

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/tests/mesos.hpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.hpp b/src/tests/mesos.hpp
index 06b22f9..2cb3239 100644
--- a/src/tests/mesos.hpp
+++ b/src/tests/mesos.hpp
@@ -1019,6 +1019,21 @@ inline hashmap<std::string, double> convertToHashmap(
 }
 
 
+// Helper to create DomainInfo.
+template <typename TDomainInfo>
+inline TDomainInfo createDomainInfo(
+    const std::string& regionName,
+    const std::string& zoneName)
+{
+  TDomainInfo domain;
+
+  domain.mutable_fault_domain()->mutable_region()->set_name(regionName);
+  domain.mutable_fault_domain()->mutable_zone()->set_name(zoneName);
+
+  return domain;
+}
+
+
 // Helpers for creating offer operations.
 template <typename TResources, typename TOffer>
 inline typename TOffer::Operation RESERVE(const TResources& resources)
@@ -1330,6 +1345,13 @@ inline hashmap<std::string, double> convertToHashmap(Args&&... args)
 
 
 template <typename... Args>
+inline DomainInfo createDomainInfo(Args&&... args)
+{
+  return common::createDomainInfo<DomainInfo>(std::forward<Args>(args)...);
+}
+
+
+template <typename... Args>
 inline Offer::Operation RESERVE(Args&&... args)
 {
   return common::RESERVE<Resources, Offer>(std::forward<Args>(args)...);

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 8a69cc2..035db18 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -7476,6 +7476,85 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(SlaveTest, ExecutorReregistrationTimeoutFlag)
   driver.join();
 }
 
+
+// This test checks that if an agent is shutdown gracefully, then its
+// domain is configured and the agent is restarted, the agent restarts
+// successfully. Note that shutting down the agent gracefully (killing
+// all tasks) is necessary, because changing the agent's domain is an
+// incompatible change to its SlaveInfo.
+TEST_F(SlaveTest, ChangeDomain)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  masterFlags.domain = createDomainInfo("region-abc", "zone-123");
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage1 =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  Try<Owned<cluster::Slave>> slave1 = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave1);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage1);
+
+  // Gracefully shutdown the agent.
+  slave1.get()->shutdown();
+
+  // Restart the agent with a domain. We use the same `slave::Flags`,
+  // so the new instance of the agent uses the same `work_dir`.
+  const string AGENT_REGION = "region-abc";
+  const string AGENT_ZONE = "zone-456";
+
+  slaveFlags.domain = createDomainInfo(AGENT_REGION, AGENT_ZONE);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage2 =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  Try<Owned<cluster::Slave>> slave2 = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave2);
+
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  AWAIT_READY(slaveRegisteredMessage2);
+
+  // The agent should be assigned a new AgentID.
+  EXPECT_NE(slaveRegisteredMessage1->slave_id(),
+            slaveRegisteredMessage2->slave_id());
+
+  // Check that the new agent domain is correctly reflected in the
+  // master's HTTP endpoints.
+  {
+    Future<Response> response = process::http::get(
+        master.get()->pid,
+        "slaves",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+    AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+
+    JSON::Array slaves = parse->values["slaves"].as<JSON::Array>();
+    ASSERT_EQ(1u, slaves.values.size());
+
+    Result<JSON::String> agentRegion = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.region.name");
+    Result<JSON::String> agentZone = parse->find<JSON::String>(
+        "slaves[0].domain.fault_domain.zone.name");
+
+    EXPECT_SOME_EQ(JSON::String(AGENT_REGION), agentRegion);
+    EXPECT_SOME_EQ(JSON::String(AGENT_ZONE), agentZone);
+  }
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/ac2b2c8d/src/v1/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/v1/mesos.cpp b/src/v1/mesos.cpp
index 13f336c..e5143e1 100644
--- a/src/v1/mesos.cpp
+++ b/src/v1/mesos.cpp
@@ -339,7 +339,8 @@ bool operator==(const MasterInfo& left, const MasterInfo& right)
     left.port() == right.port() &&
     left.pid() == right.pid() &&
     left.hostname() == right.hostname() &&
-    left.version() == right.version();
+    left.version() == right.version() &&
+    left.domain() == right.domain();
 }
 
 
@@ -385,7 +386,8 @@ bool operator==(const AgentInfo& left, const AgentInfo& right)
     Resources(left.resources()) == Resources(right.resources()) &&
     Attributes(left.attributes()) == Attributes(right.attributes()) &&
     left.id() == right.id() &&
-    left.port() == right.port();
+    left.port() == right.port() &&
+    left.domain() == right.domain();
 }
 
 


[8/8] mesos git commit: Added agent domain to Offer message.

Posted by ne...@apache.org.
Added agent domain to Offer message.

This is a convenience mechanism to allow frameworks to determine the
domain of an agent when they receive a resource offer.

Review: https://reviews.apache.org/r/59921/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/33093c89
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/33093c89
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/33093c89

Branch: refs/heads/master
Commit: 33093c893773f8c9d293afe38e9909f9a2868d32
Parents: 25111bb
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jul 11 10:43:54 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Tue Jul 11 10:43:54 2017 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto    |  3 +++
 include/mesos/v1/mesos.proto |  3 +++
 src/master/master.cpp        |  4 ++++
 src/tests/master_tests.cpp   | 16 ++++++++++++++++
 4 files changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/33093c89/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 64ec085..8f8079b 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -1755,6 +1755,9 @@ message Offer {
   // URL for reaching the slave running on the host.
   optional URL url = 8;
 
+  // The domain of the slave.
+  optional DomainInfo domain = 11;
+
   repeated Resource resources = 5;
   repeated Attribute attributes = 7;
   repeated ExecutorID executor_ids = 6;

http://git-wip-us.apache.org/repos/asf/mesos/blob/33093c89/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 5b8d00d..720f307 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -1738,6 +1738,9 @@ message Offer {
   // URL for reaching the agent running on the host.
   optional URL url = 8;
 
+  // The domain of the agent.
+  optional DomainInfo domain = 11;
+
   repeated Resource resources = 5;
   repeated Attribute attributes = 7;
   repeated ExecutorID executor_ids = 6;

http://git-wip-us.apache.org/repos/asf/mesos/blob/33093c89/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index eb660cc..d895154 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -7538,6 +7538,10 @@ void Master::offer(
       offer->mutable_attributes()->MergeFrom(slave->info.attributes());
       offer->mutable_allocation_info()->set_role(role);
 
+      if (slave->info.has_domain()) {
+        offer->mutable_domain()->MergeFrom(slave->info.domain());
+      }
+
       // Add all framework's executors running on this slave.
       if (slave->executors.contains(framework->id())) {
         const hashmap<ExecutorID, ExecutorInfo>& executors =

http://git-wip-us.apache.org/repos/asf/mesos/blob/33093c89/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index e070e87..6e6461c 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -7389,6 +7389,8 @@ TEST_F(MasterTest, AgentDomainSameRegion)
   Clock::advance(slaveFlags.registration_backoff_factor);
   AWAIT_READY(slaveRegisteredMessage);
 
+  const SlaveID& slaveId = slaveRegisteredMessage->slave_id();
+
   MockScheduler sched;
   MesosSchedulerDriver driver(
       &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
@@ -7409,6 +7411,10 @@ TEST_F(MasterTest, AgentDomainSameRegion)
   AWAIT_READY(offers);
   ASSERT_FALSE(offers->empty());
 
+  Offer offer = offers->front();
+  EXPECT_EQ(slaveId, offer.slave_id());
+  EXPECT_EQ(slaveFlags.domain.get(), offer.domain());
+
   driver.stop();
   driver.join();
 }
@@ -7442,6 +7448,8 @@ TEST_F(MasterTest, AgentDomainDifferentRegion)
   Clock::advance(slaveFlags.registration_backoff_factor);
   AWAIT_READY(slaveRegisteredMessage);
 
+  const SlaveID& slaveId = slaveRegisteredMessage->slave_id();
+
   // Launch a non-region-aware scheduler. It should NOT receive any
   // resource offers for `slave`.
   {
@@ -7492,6 +7500,8 @@ TEST_F(MasterTest, AgentDomainDifferentRegion)
     ASSERT_FALSE(offers->empty());
 
     Offer offer = offers->front();
+    EXPECT_EQ(slaveId, offer.slave_id());
+    EXPECT_EQ(slaveFlags.domain.get(), offer.domain());
 
     // Check that we can launch a task in a remote region.
     TaskInfo task = createTask(offer, "sleep 60");
@@ -7541,6 +7551,8 @@ TEST_F(MasterTest, AgentDomainUnset)
   Clock::advance(slaveFlags.registration_backoff_factor);
   AWAIT_READY(slaveRegisteredMessage);
 
+  const SlaveID& slaveId = slaveRegisteredMessage->slave_id();
+
   MockScheduler sched;
   MesosSchedulerDriver driver(
       &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
@@ -7556,6 +7568,10 @@ TEST_F(MasterTest, AgentDomainUnset)
   AWAIT_READY(offers);
   ASSERT_FALSE(offers->empty());
 
+  Offer offer = offers->front();
+  EXPECT_EQ(slaveId, offer.slave_id());
+  EXPECT_FALSE(offer.has_domain());
+
   driver.stop();
   driver.join();
 }