You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/07/20 04:39:46 UTC
[1/2] mesos git commit: Changed the way tests capture agent state
transitioning.
Repository: mesos
Updated Branches:
refs/heads/master 52f829153 -> ef6622589
Changed the way tests capture agent state transitioning.
The existing way captures the registrar operation which may not
occur after MESOS-7711 but regardless of that, capturing the agent
authorization is equivalent and arguably more straightforward.
Review: https://reviews.apache.org/r/60854
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b43ceb8b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b43ceb8b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b43ceb8b
Branch: refs/heads/master
Commit: b43ceb8b97e4eca507a699113adcd311a071936f
Parents: 52f8291
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jul 13 15:49:59 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:25:41 2017 -0700
----------------------------------------------------------------------
src/tests/master_tests.cpp | 25 +++++++++++--------------
src/tests/reconciliation_tests.cpp | 19 ++++++++-----------
2 files changed, 19 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/b43ceb8b/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 5742860..e3ccf8c 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -546,18 +546,11 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
EXPECT_CALL(sched, disconnected(&driver))
.WillOnce(FutureSatisfy(&disconnected));
- // Restart master.
- master = StartMaster(masterFlags);
+ // Restart master with a mock authorizer to block agent state transitioning.
+ MockAuthorizer authorizer;
+ master = StartMaster(&authorizer, masterFlags);
ASSERT_SOME(master);
- // Intercept the first registrar operation that is attempted; this
- // should be the registry operation that reregisters the slave.
- Future<Owned<master::Operation>> reregister;
- Promise<bool> promise; // Never satisfied.
- EXPECT_CALL(*master.get()->registrar.get(), apply(_))
- .WillOnce(DoAll(FutureArg<0>(&reregister),
- Return(promise.future())));
-
frameworkId = Future<FrameworkID>();
EXPECT_CALL(sched, registered(&driver, _, _))
.WillOnce(FutureArg<1>(&frameworkId));
@@ -569,15 +562,19 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
AWAIT_READY(disconnected);
AWAIT_READY(frameworkId);
+ // Intercept agent authorization.
+ Future<Nothing> authorize;
+ Promise<bool> promise; // Never satisfied.
+ EXPECT_CALL(authorizer, authorized(_))
+ .WillOnce(DoAll(FutureSatisfy(&authorize),
+ Return(promise.future())));
+
// Restart slave.
slave = StartSlave(&detector, &containerizer, slaveFlags);
ASSERT_SOME(slave);
// Wait for the slave to start reregistration.
- AWAIT_READY(reregister);
- EXPECT_NE(
- nullptr,
- dynamic_cast<master::MarkSlaveReachable*>(reregister->get()));
+ AWAIT_READY(authorize);
// As Master::killTask isn't doing anything, we shouldn't get a status update.
EXPECT_CALL(sched, statusUpdate(&driver, _))
http://git-wip-us.apache.org/repos/asf/mesos/blob/b43ceb8b/src/tests/reconciliation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/reconciliation_tests.cpp b/src/tests/reconciliation_tests.cpp
index 9c36b9a..f1635fd 100644
--- a/src/tests/reconciliation_tests.cpp
+++ b/src/tests/reconciliation_tests.cpp
@@ -554,8 +554,9 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
slave.get()->terminate();
slave->reset();
- // Restart the master.
- master = StartMaster(masterFlags);
+ // Restart master with a mock authorizer to block agent state transitioning.
+ MockAuthorizer authorizer;
+ master = StartMaster(&authorizer, masterFlags);
ASSERT_SOME(master);
MockScheduler sched;
@@ -578,12 +579,11 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
// Wait for the framework to register.
AWAIT_READY(frameworkId);
- // Intercept the first registrar operation that is attempted; this
- // should be the registry operation that reregisters the slave.
- Future<Owned<master::Operation>> reregister;
+ // Intercept agent authorization.
+ Future<Nothing> authorize;
Promise<bool> promise; // Never satisfied.
- EXPECT_CALL(*master.get()->registrar.get(), apply(_))
- .WillOnce(DoAll(FutureArg<0>(&reregister),
+ EXPECT_CALL(authorizer, authorized(_))
+ .WillOnce(DoAll(FutureSatisfy(&authorize),
Return(promise.future())));
// Restart the slave.
@@ -592,10 +592,7 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
ASSERT_SOME(slave);
// Wait for the slave to start reregistration.
- AWAIT_READY(reregister);
- EXPECT_NE(
- nullptr,
- dynamic_cast<master::MarkSlaveReachable*>(reregister->get()));
+ AWAIT_READY(authorize);
Future<mesos::scheduler::Call> reconcileCall = FUTURE_CALL(
mesos::scheduler::Call(), mesos::scheduler::Call::RECONCILE, _, _);
[2/2] mesos git commit: Skipped consulting registry if the agent is
in `slaves.recovered`.
Posted by ya...@apache.org.
Skipped consulting registry if the agent is in `slaves.recovered`.
Agents in `slaves.recovered` haven't been marked unreachable and
would have been in `slaves.registered` if the master has not failed
over. So this is consistent with how the master in steady state handles
reregistering agents by checking against `slaves.registered`.
Review: https://reviews.apache.org/r/60400
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ef662258
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ef662258
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ef662258
Branch: refs/heads/master
Commit: ef66225896be26fd4e7b0bb914e2820366613470
Parents: b43ceb8
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jun 22 14:01:27 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:39:11 2017 -0700
----------------------------------------------------------------------
src/master/master.cpp | 58 +++++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ef662258/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a902bfc..e12c997 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6182,25 +6182,45 @@ void Master::_reregisterSlave(
LOG(INFO) << "Re-registering agent " << slaveInfo.id() << " at " << pid
<< " (" << slaveInfo.hostname() << ")";
- // Consult the registry to determine whether to readmit the
- // slave. In the common case, the slave has been marked unreachable
- // by the master, so we move the slave to the reachable list and
- // readmit it. If the slave isn't in the unreachable list (which
- // might occur if the slave's entry in the unreachable list is
- // GC'd), we admit the slave anyway.
- registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
- .onAny(defer(self(),
- &Self::__reregisterSlave,
- slaveInfo,
- pid,
- checkpointedResources,
- executorInfos,
- tasks,
- frameworks,
- completedFrameworks,
- version,
- agentCapabilities,
- lambda::_1));
+ if (slaves.recovered.contains(slaveInfo.id())) {
+ // The agent likely is re-registering after a master failover as it
+ // is in the list recovered from the registry. No need to consult the
+ // registry in this case and we can directly re-admit it.
+ VLOG(1) << "Re-admitting recovered agent " << slaveInfo.id() << " at "
+ << pid << " (" << slaveInfo.hostname() << ")";
+
+ __reregisterSlave(
+ slaveInfo,
+ pid,
+ checkpointedResources,
+ executorInfos,
+ tasks,
+ frameworks,
+ completedFrameworks,
+ version,
+ agentCapabilities,
+ true);
+ } else {
+ // Consult the registry to determine whether to readmit the
+ // slave. In the common case, the slave has been marked unreachable
+ // by the master, so we move the slave to the reachable list and
+ // readmit it. If the slave isn't in the unreachable list (which
+ // might occur if the slave's entry in the unreachable list is
+ // GC'd), we admit the slave anyway.
+ registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
+ .onAny(defer(self(),
+ &Self::__reregisterSlave,
+ slaveInfo,
+ pid,
+ checkpointedResources,
+ executorInfos,
+ tasks,
+ frameworks,
+ completedFrameworks,
+ version,
+ agentCapabilities,
+ lambda::_1));
+ }
}