You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/07/20 04:39:47 UTC
[2/2] mesos git commit: Skipped consulting registry if the agent is
in `slaves.recovered`.
Skipped consulting registry if the agent is in `slaves.recovered`.
Agents in `slaves.recovered` haven't been marked unreachable and
would have been in `slaves.registered` if the master has not failed
over. So this is consistent with how the master in steady state handles
reregistering agents by checking against `slaves.registered`.
Review: https://reviews.apache.org/r/60400
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ef662258
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ef662258
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ef662258
Branch: refs/heads/master
Commit: ef66225896be26fd4e7b0bb914e2820366613470
Parents: b43ceb8
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jun 22 14:01:27 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:39:11 2017 -0700
----------------------------------------------------------------------
src/master/master.cpp | 58 +++++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ef662258/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a902bfc..e12c997 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6182,25 +6182,45 @@ void Master::_reregisterSlave(
LOG(INFO) << "Re-registering agent " << slaveInfo.id() << " at " << pid
<< " (" << slaveInfo.hostname() << ")";
- // Consult the registry to determine whether to readmit the
- // slave. In the common case, the slave has been marked unreachable
- // by the master, so we move the slave to the reachable list and
- // readmit it. If the slave isn't in the unreachable list (which
- // might occur if the slave's entry in the unreachable list is
- // GC'd), we admit the slave anyway.
- registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
- .onAny(defer(self(),
- &Self::__reregisterSlave,
- slaveInfo,
- pid,
- checkpointedResources,
- executorInfos,
- tasks,
- frameworks,
- completedFrameworks,
- version,
- agentCapabilities,
- lambda::_1));
+ if (slaves.recovered.contains(slaveInfo.id())) {
+ // The agent likely is re-registering after a master failover as it
+ // is in the list recovered from the registry. No need to consult the
+ // registry in this case and we can directly re-admit it.
+ VLOG(1) << "Re-admitting recovered agent " << slaveInfo.id() << " at "
+ << pid << " (" << slaveInfo.hostname() << ")";
+
+ __reregisterSlave(
+ slaveInfo,
+ pid,
+ checkpointedResources,
+ executorInfos,
+ tasks,
+ frameworks,
+ completedFrameworks,
+ version,
+ agentCapabilities,
+ true);
+ } else {
+ // Consult the registry to determine whether to readmit the
+ // slave. In the common case, the slave has been marked unreachable
+ // by the master, so we move the slave to the reachable list and
+ // readmit it. If the slave isn't in the unreachable list (which
+ // might occur if the slave's entry in the unreachable list is
+ // GC'd), we admit the slave anyway.
+ registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
+ .onAny(defer(self(),
+ &Self::__reregisterSlave,
+ slaveInfo,
+ pid,
+ checkpointedResources,
+ executorInfos,
+ tasks,
+ frameworks,
+ completedFrameworks,
+ version,
+ agentCapabilities,
+ lambda::_1));
+ }
}