You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jo...@apache.org on 2019/07/16 20:43:19 UTC

[mesos] 02/09: Modified registry operations for unreachable draining agents.

This is an automated email from the ASF dual-hosted git repository.

josephwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 6d4a370c699dd67ae44c9e42e08cde4fc52d7930
Author: Joseph Wu <jo...@apache.org>
AuthorDate: Wed Jun 26 06:53:20 2019 -0700

    Modified registry operations for unreachable draining agents.
    
    When agents transition from reachable to unreachable, or vice
    versa, the draining config is now copied too, along with the
    'deactivated' boolean.
    
    Review: https://reviews.apache.org/r/70956
---
 src/master/registry.proto          |  5 ++++
 src/master/registry_operations.cpp | 49 ++++++++++++++++++++++++++++++++++----
 src/tests/registrar_tests.cpp      | 41 +++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/src/master/registry.proto b/src/master/registry.proto
index 435b9e1..257ad99 100644
--- a/src/master/registry.proto
+++ b/src/master/registry.proto
@@ -61,6 +61,11 @@ message Registry {
 
     // The time when the slave was marked unreachable by the master.
     required TimeInfo timestamp = 2;
+
+    // If the agent returns, these objects should be transferred to
+    // the appropriate `Slave` message as well.
+    optional DrainInfo drain_info = 3;
+    optional bool deactivated = 4 [default = false];
   }
 
   message UnreachableSlaves {
diff --git a/src/master/registry_operations.cpp b/src/master/registry_operations.cpp
index 00dca41..c286f6c 100644
--- a/src/master/registry_operations.cpp
+++ b/src/master/registry_operations.cpp
@@ -120,15 +120,22 @@ Try<bool> MarkSlaveUnreachable::perform(
     const Registry::Slave& slave = registry->slaves().slaves(i);
 
     if (slave.info().id() == info.id()) {
-      registry->mutable_slaves()->mutable_slaves()->DeleteSubrange(i, 1);
-      slaveIDs->erase(info.id());
-
       Registry::UnreachableSlave* unreachable =
         registry->mutable_unreachable()->add_slaves();
 
       unreachable->mutable_id()->CopyFrom(info.id());
       unreachable->mutable_timestamp()->CopyFrom(unreachableTime);
 
+      // Copy the draining and deactivation states.
+      if (slave.has_drain_info()) {
+        unreachable->mutable_drain_info()->CopyFrom(slave.drain_info());
+      }
+
+      unreachable->set_deactivated(slave.deactivated());
+
+      registry->mutable_slaves()->mutable_slaves()->DeleteSubrange(i, 1);
+      slaveIDs->erase(info.id());
+
       return true; // Mutation.
     }
   }
@@ -165,6 +172,9 @@ Try<bool> MarkSlaveReachable::perform(
     return false; // No mutation.
   }
 
+  Registry::Slave reachable;
+  reachable.mutable_info()->CopyFrom(info);
+
   // Check whether the slave is in the unreachable list.
   // TODO(neilc): Optimize this to avoid linear scan.
   bool found = false;
@@ -173,6 +183,13 @@ Try<bool> MarkSlaveReachable::perform(
       registry->unreachable().slaves(i);
 
     if (slave.id() == info.id()) {
+      // Copy the draining and deactivation states.
+      if (slave.has_drain_info()) {
+        reachable.mutable_drain_info()->CopyFrom(slave.drain_info());
+      }
+
+      reachable.set_deactivated(slave.deactivated());
+
       registry->mutable_unreachable()->mutable_slaves()->DeleteSubrange(i, 1);
       found = true;
       break;
@@ -191,8 +208,7 @@ Try<bool> MarkSlaveReachable::perform(
   // in the unreachable list. This accounts for when the slave was
   // unreachable for a long time, was GC'd from the unreachable
   // list, but then eventually reregistered.
-  Registry::Slave* slave = registry->mutable_slaves()->add_slaves();
-  slave->mutable_info()->CopyFrom(info);
+  registry->mutable_slaves()->add_slaves()->CopyFrom(reachable);
   slaveIDs->insert(info.id());
 
   return true; // Mutation.
@@ -385,6 +401,29 @@ Try<bool> DrainAgent::perform(Registry* registry, hashset<SlaveID>* slaveIDs)
     }
   }
 
+  // If not found above, check the unreachable list.
+  if (!found) {
+    for (int i = 0; i < registry->unreachable().slaves().size(); i++) {
+      if (registry->unreachable().slaves(i).id() == slaveId) {
+        Registry::UnreachableSlave* slave =
+          registry->mutable_unreachable()->mutable_slaves(i);
+
+        slave->mutable_drain_info()->set_state(DRAINING);
+
+        // Copy the DrainConfig and ensure the agent is deactivated.
+        if (maxGracePeriod.isSome()) {
+          slave->mutable_drain_info()->mutable_config()
+            ->mutable_max_grace_period()->CopyFrom(maxGracePeriod.get());
+        }
+
+        slave->mutable_drain_info()->mutable_config()->set_mark_gone(markGone);
+        slave->set_deactivated(true);
+        found = true;
+        break;
+      }
+    }
+  }
+
   // Make sure the AGENT_DRAINING minimum capability is present or added.
   if (found) {
     protobuf::master::addMinimumCapability(
diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp
index b4f2e25..d599c3c 100644
--- a/src/tests/registrar_tests.cpp
+++ b/src/tests/registrar_tests.cpp
@@ -970,6 +970,47 @@ TEST_F(RegistrarTest, DrainAgent)
     EXPECT_EQ(
         MasterInfo_Capability_Type_Name(MasterInfo::Capability::AGENT_DRAINING),
         registry->minimum_capabilities(0).capability());
+
+    // Mark the agent unreachable.
+    AWAIT_TRUE(registrar.apply(Owned<RegistryOperation>(
+        new MarkSlaveUnreachable(slave, protobuf::getCurrentTime()))));
+  }
+
+  {
+    // Check that unreachable agent retains the draining.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    EXPECT_EQ(0, registry->slaves().slaves().size());
+    ASSERT_EQ(1, registry->unreachable().slaves().size());
+    ASSERT_TRUE(registry->unreachable().slaves(0).has_drain_info());
+    EXPECT_FALSE(
+        registry->unreachable().slaves(0)
+          .drain_info().config().has_max_grace_period());
+    EXPECT_TRUE(
+        registry->unreachable().slaves(0).drain_info().config().mark_gone());
+    EXPECT_TRUE(registry->unreachable().slaves(0).deactivated());
+
+    // Mark the agent reachable.
+    AWAIT_TRUE(registrar.apply(Owned<RegistryOperation>(
+        new MarkSlaveReachable(slave))));
+  }
+
+  {
+    // Check that reachable agent retains the draining.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    ASSERT_EQ(1, registry->slaves().slaves().size());
+    EXPECT_EQ(0, registry->unreachable().slaves().size());
+    ASSERT_TRUE(registry->slaves().slaves(0).has_drain_info());
+    EXPECT_FALSE(
+        registry->slaves().slaves(0)
+          .drain_info().config().has_max_grace_period());
+    EXPECT_TRUE(registry->slaves().slaves(0).drain_info().config().mark_gone());
+    EXPECT_TRUE(registry->slaves().slaves(0).deactivated());
   }
 }