You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/07/20 04:39:47 UTC

[2/2] mesos git commit: Skipped consulting registry if the agent is in `slaves.recovered`.

Skipped consulting registry if the agent is in `slaves.recovered`.

Agents in `slaves.recovered` haven't been marked unreachable and
would have been in `slaves.registered` if the master has not failed
over. So this is consistent with how the master in steady state handles
reregistering agents by checking against `slaves.registered`.

Review: https://reviews.apache.org/r/60400


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ef662258
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ef662258
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ef662258

Branch: refs/heads/master
Commit: ef66225896be26fd4e7b0bb914e2820366613470
Parents: b43ceb8
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jun 22 14:01:27 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:39:11 2017 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 58 +++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ef662258/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a902bfc..e12c997 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6182,25 +6182,45 @@ void Master::_reregisterSlave(
   LOG(INFO) << "Re-registering agent " << slaveInfo.id() << " at " << pid
             << " (" << slaveInfo.hostname() << ")";
 
-  // Consult the registry to determine whether to readmit the
-  // slave. In the common case, the slave has been marked unreachable
-  // by the master, so we move the slave to the reachable list and
-  // readmit it. If the slave isn't in the unreachable list (which
-  // might occur if the slave's entry in the unreachable list is
-  // GC'd), we admit the slave anyway.
-  registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
-    .onAny(defer(self(),
-                 &Self::__reregisterSlave,
-                 slaveInfo,
-                 pid,
-                 checkpointedResources,
-                 executorInfos,
-                 tasks,
-                 frameworks,
-                 completedFrameworks,
-                 version,
-                 agentCapabilities,
-                 lambda::_1));
+  if (slaves.recovered.contains(slaveInfo.id())) {
+    // The agent likely is re-registering after a master failover as it
+    // is in the list recovered from the registry. No need to consult the
+    // registry in this case and we can directly re-admit it.
+    VLOG(1) << "Re-admitting recovered agent " << slaveInfo.id() << " at "
+            << pid << " (" << slaveInfo.hostname() << ")";
+
+    __reregisterSlave(
+        slaveInfo,
+        pid,
+        checkpointedResources,
+        executorInfos,
+        tasks,
+        frameworks,
+        completedFrameworks,
+        version,
+        agentCapabilities,
+        true);
+  } else {
+    // Consult the registry to determine whether to readmit the
+    // slave. In the common case, the slave has been marked unreachable
+    // by the master, so we move the slave to the reachable list and
+    // readmit it. If the slave isn't in the unreachable list (which
+    // might occur if the slave's entry in the unreachable list is
+    // GC'd), we admit the slave anyway.
+    registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
+      .onAny(defer(self(),
+                   &Self::__reregisterSlave,
+                   slaveInfo,
+                   pid,
+                   checkpointedResources,
+                   executorInfos,
+                   tasks,
+                   frameworks,
+                   completedFrameworks,
+                   version,
+                   agentCapabilities,
+                   lambda::_1));
+  }
 }