You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/07/20 04:39:46 UTC

[1/2] mesos git commit: Changed the way tests capture agent state transitioning.

Repository: mesos
Updated Branches:
  refs/heads/master 52f829153 -> ef6622589


Changed the way tests capture agent state transitioning.

The existing way captures the registrar operation which may not
occur after MESOS-7711 but regardless of that, capturing the agent
authorization is equivalent and arguably more straightforward.

Review: https://reviews.apache.org/r/60854


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b43ceb8b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b43ceb8b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b43ceb8b

Branch: refs/heads/master
Commit: b43ceb8b97e4eca507a699113adcd311a071936f
Parents: 52f8291
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jul 13 15:49:59 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:25:41 2017 -0700

----------------------------------------------------------------------
 src/tests/master_tests.cpp         | 25 +++++++++++--------------
 src/tests/reconciliation_tests.cpp | 19 ++++++++-----------
 2 files changed, 19 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b43ceb8b/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 5742860..e3ccf8c 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -546,18 +546,11 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
   EXPECT_CALL(sched, disconnected(&driver))
     .WillOnce(FutureSatisfy(&disconnected));
 
-  // Restart master.
-  master = StartMaster(masterFlags);
+  // Restart master with a mock authorizer to block agent state transitioning.
+  MockAuthorizer authorizer;
+  master = StartMaster(&authorizer, masterFlags);
   ASSERT_SOME(master);
 
-  // Intercept the first registrar operation that is attempted; this
-  // should be the registry operation that reregisters the slave.
-  Future<Owned<master::Operation>> reregister;
-  Promise<bool> promise; // Never satisfied.
-  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
-    .WillOnce(DoAll(FutureArg<0>(&reregister),
-                    Return(promise.future())));
-
   frameworkId = Future<FrameworkID>();
   EXPECT_CALL(sched, registered(&driver, _, _))
     .WillOnce(FutureArg<1>(&frameworkId));
@@ -569,15 +562,19 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
   AWAIT_READY(disconnected);
   AWAIT_READY(frameworkId);
 
+  // Intercept agent authorization.
+  Future<Nothing> authorize;
+  Promise<bool> promise; // Never satisfied.
+  EXPECT_CALL(authorizer, authorized(_))
+    .WillOnce(DoAll(FutureSatisfy(&authorize),
+                    Return(promise.future())));
+
   // Restart slave.
   slave = StartSlave(&detector, &containerizer, slaveFlags);
   ASSERT_SOME(slave);
 
   // Wait for the slave to start reregistration.
-  AWAIT_READY(reregister);
-  EXPECT_NE(
-      nullptr,
-      dynamic_cast<master::MarkSlaveReachable*>(reregister->get()));
+  AWAIT_READY(authorize);
 
   // As Master::killTask isn't doing anything, we shouldn't get a status update.
   EXPECT_CALL(sched, statusUpdate(&driver, _))

http://git-wip-us.apache.org/repos/asf/mesos/blob/b43ceb8b/src/tests/reconciliation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/reconciliation_tests.cpp b/src/tests/reconciliation_tests.cpp
index 9c36b9a..f1635fd 100644
--- a/src/tests/reconciliation_tests.cpp
+++ b/src/tests/reconciliation_tests.cpp
@@ -554,8 +554,9 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
   slave.get()->terminate();
   slave->reset();
 
-  // Restart the master.
-  master = StartMaster(masterFlags);
+  // Restart master with a mock authorizer to block agent state transitioning.
+  MockAuthorizer authorizer;
+  master = StartMaster(&authorizer, masterFlags);
   ASSERT_SOME(master);
 
   MockScheduler sched;
@@ -578,12 +579,11 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
   // Wait for the framework to register.
   AWAIT_READY(frameworkId);
 
-  // Intercept the first registrar operation that is attempted; this
-  // should be the registry operation that reregisters the slave.
-  Future<Owned<master::Operation>> reregister;
+  // Intercept agent authorization.
+  Future<Nothing> authorize;
   Promise<bool> promise; // Never satisfied.
-  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
-    .WillOnce(DoAll(FutureArg<0>(&reregister),
+  EXPECT_CALL(authorizer, authorized(_))
+    .WillOnce(DoAll(FutureSatisfy(&authorize),
                     Return(promise.future())));
 
   // Restart the slave.
@@ -592,10 +592,7 @@ TEST_F(ReconciliationTest, RecoveredAgentReregistrationInProgress)
   ASSERT_SOME(slave);
 
   // Wait for the slave to start reregistration.
-  AWAIT_READY(reregister);
-  EXPECT_NE(
-      nullptr,
-      dynamic_cast<master::MarkSlaveReachable*>(reregister->get()));
+  AWAIT_READY(authorize);
 
   Future<mesos::scheduler::Call> reconcileCall = FUTURE_CALL(
       mesos::scheduler::Call(), mesos::scheduler::Call::RECONCILE, _, _);


[2/2] mesos git commit: Skipped consulting registry if the agent is in `slaves.recovered`.

Posted by ya...@apache.org.
Skipped consulting registry if the agent is in `slaves.recovered`.

Agents in `slaves.recovered` haven't been marked unreachable and
would have been in `slaves.registered` if the master has not failed
over. So this is consistent with how the master in steady state handles
reregistering agents by checking against `slaves.registered`.

Review: https://reviews.apache.org/r/60400


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ef662258
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ef662258
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ef662258

Branch: refs/heads/master
Commit: ef66225896be26fd4e7b0bb914e2820366613470
Parents: b43ceb8
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Jun 22 14:01:27 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 19 21:39:11 2017 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 58 +++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ef662258/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a902bfc..e12c997 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6182,25 +6182,45 @@ void Master::_reregisterSlave(
   LOG(INFO) << "Re-registering agent " << slaveInfo.id() << " at " << pid
             << " (" << slaveInfo.hostname() << ")";
 
-  // Consult the registry to determine whether to readmit the
-  // slave. In the common case, the slave has been marked unreachable
-  // by the master, so we move the slave to the reachable list and
-  // readmit it. If the slave isn't in the unreachable list (which
-  // might occur if the slave's entry in the unreachable list is
-  // GC'd), we admit the slave anyway.
-  registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
-    .onAny(defer(self(),
-                 &Self::__reregisterSlave,
-                 slaveInfo,
-                 pid,
-                 checkpointedResources,
-                 executorInfos,
-                 tasks,
-                 frameworks,
-                 completedFrameworks,
-                 version,
-                 agentCapabilities,
-                 lambda::_1));
+  if (slaves.recovered.contains(slaveInfo.id())) {
+    // The agent likely is re-registering after a master failover as it
+    // is in the list recovered from the registry. No need to consult the
+    // registry in this case and we can directly re-admit it.
+    VLOG(1) << "Re-admitting recovered agent " << slaveInfo.id() << " at "
+            << pid << " (" << slaveInfo.hostname() << ")";
+
+    __reregisterSlave(
+        slaveInfo,
+        pid,
+        checkpointedResources,
+        executorInfos,
+        tasks,
+        frameworks,
+        completedFrameworks,
+        version,
+        agentCapabilities,
+        true);
+  } else {
+    // Consult the registry to determine whether to readmit the
+    // slave. In the common case, the slave has been marked unreachable
+    // by the master, so we move the slave to the reachable list and
+    // readmit it. If the slave isn't in the unreachable list (which
+    // might occur if the slave's entry in the unreachable list is
+    // GC'd), we admit the slave anyway.
+    registrar->apply(Owned<Operation>(new MarkSlaveReachable(slaveInfo)))
+      .onAny(defer(self(),
+                   &Self::__reregisterSlave,
+                   slaveInfo,
+                   pid,
+                   checkpointedResources,
+                   executorInfos,
+                   tasks,
+                   frameworks,
+                   completedFrameworks,
+                   version,
+                   agentCapabilities,
+                   lambda::_1));
+  }
 }