You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bb...@apache.org on 2018/05/04 13:03:01 UTC

[1/3] mesos git commit: Changed failure check for a call to os::system().

Repository: mesos
Updated Branches:
  refs/heads/master 520b72985 -> 351bade6c


Changed failure check for a call to os::system().

The previous check for `None()` was missing the case where the
program exited with a non-zero exit status.

Review: https://reviews.apache.org/r/66776/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bb8c9a28
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bb8c9a28
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bb8c9a28

Branch: refs/heads/master
Commit: bb8c9a28418e1632c225f669f4d0326332d3a835
Parents: 520b729
Author: Benno Evers <be...@mesosphere.com>
Authored: Fri May 4 13:37:25 2018 +0200
Committer: Benjamin Bannier <bb...@apache.org>
Committed: Fri May 4 13:37:25 2018 +0200

----------------------------------------------------------------------
 3rdparty/libprocess/src/memory_profiler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/bb8c9a28/3rdparty/libprocess/src/memory_profiler.cpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/src/memory_profiler.cpp b/3rdparty/libprocess/src/memory_profiler.cpp
index b765246..a4c6be6 100644
--- a/3rdparty/libprocess/src/memory_profiler.cpp
+++ b/3rdparty/libprocess/src/memory_profiler.cpp
@@ -322,7 +322,7 @@ Try<Nothing> generateJeprofFile(
       inputPath,
       outputPath).get());
 
-  if (result.isNone()) {
+  if (result != 0) {
     return Error(
       "Error trying to run jeprof. Please make sure that jeprof is installed"
       " and that the input file contains data. For more information, please"


[2/3] mesos git commit: Added missing test expectation.

Posted by bb...@apache.org.
Added missing test expectation.

On master failover the scheduler will get disconnected. Add a test
expectation for that. This silences a gmock warning.

Review: https://reviews.apache.org/r/66849/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e4de09c8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e4de09c8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e4de09c8

Branch: refs/heads/master
Commit: e4de09c86c6ef936d2a7369b491483e835616717
Parents: bb8c9a2
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri May 4 13:38:08 2018 +0200
Committer: Benjamin Bannier <bb...@apache.org>
Committed: Fri May 4 13:38:08 2018 +0200

----------------------------------------------------------------------
 src/tests/operation_reconciliation_tests.cpp | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e4de09c8/src/tests/operation_reconciliation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/operation_reconciliation_tests.cpp b/src/tests/operation_reconciliation_tests.cpp
index 76c1695..9717e84 100644
--- a/src/tests/operation_reconciliation_tests.cpp
+++ b/src/tests/operation_reconciliation_tests.cpp
@@ -752,6 +752,7 @@ TEST_P(OperationReconciliationTest, AgentPendingOperationAfterMasterFailover)
   AWAIT_READY(applyOperation);
 
   // Simulate master failover.
+  EXPECT_CALL(*scheduler, disconnected(_));
 
   detector->appoint(None());
 


[3/3] mesos git commit: Correctly reconciled dropped operation after agent failover.

Posted by bb...@apache.org.
Correctly reconciled dropped operation after agent failover.

When the master receives an `UpdateSlaveMessage` after agent failover
it previously did not correctly detect dropped operations (operations
known to the master, but unknown to the agent) and did not trigger
reconciliation for such operations.

This patch fixes the handler in the master so that such dropped
operations are reconciled.

Review: https://reviews.apache.org/r/66908/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/351bade6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/351bade6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/351bade6

Branch: refs/heads/master
Commit: 351bade6c28682daf821e88a40140e1364d69cb0
Parents: e4de09c
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri May 4 13:38:44 2018 +0200
Committer: Benjamin Bannier <bb...@apache.org>
Committed: Fri May 4 13:38:44 2018 +0200

----------------------------------------------------------------------
 src/master/master.cpp                           | 13 ++++
 src/tests/master_slave_reconciliation_tests.cpp | 67 ++++++++++++++++++++
 2 files changed, 80 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/351bade6/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 7a2f69c..810ccd3 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -7822,6 +7822,13 @@ void Master::updateSlave(UpdateSlaveMessage&& message)
 
   // Check if the known operations for this agent changed.
   if (!updated) {
+    // Below we loop over all received operations and check whether
+    // they are known to the master; operations can be unknown to the
+    // master after a master failover. To handle dropped operations on
+    // agent failover we explicitly track the received operations and
+    // compare them against the operations known to the master.
+    hashset<UUID> receivedOperations;
+
     foreach (const Operation& operation, message.operations().operations()) {
       if (!slave->operations.contains(operation.uuid())) {
         updated = true;
@@ -7832,6 +7839,12 @@ void Master::updateSlave(UpdateSlaveMessage&& message)
         updated = true;
         break;
       }
+
+      receivedOperations.insert(operation.uuid());
+    }
+
+    if (receivedOperations.size() != slave->operations.size()) {
+      updated = true;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/351bade6/src/tests/master_slave_reconciliation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_slave_reconciliation_tests.cpp b/src/tests/master_slave_reconciliation_tests.cpp
index 6bb4263..71e22af 100644
--- a/src/tests/master_slave_reconciliation_tests.cpp
+++ b/src/tests/master_slave_reconciliation_tests.cpp
@@ -352,6 +352,73 @@ TEST_F(MasterSlaveReconciliationTest, ReconcileDroppedTask)
 }
 
 
+// This test verifies that the master reconciles operations that are missing
+// from a reregistering slave. In this case, we drop the ApplyOperationMessage
+// and expect the master to send a ReconcileOperationsMessage after the slave
+// reregisters.
+TEST_F(MasterSlaveReconciliationTest, ReconcileDroppedOperation)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(&detector);
+  ASSERT_SOME(slave);
+
+  // Register the framework in a non-`*` role so it can reserve resources.
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers);
+
+  // We prevent the operation from reaching the agent.
+  Future<ApplyOperationMessage> applyOperationMessage =
+    DROP_PROTOBUF(ApplyOperationMessage(), _, _);
+
+  // Perform a reserve operation on the offered resources.
+  // This will trigger an `ApplyOperationMessage`.
+  ASSERT_FALSE(offers->empty());
+  const Offer& offer = offers->at(0);
+
+  Resources reservedResources = offer.resources();
+  reservedResources =
+    reservedResources.pushReservation(createDynamicReservationInfo(
+        frameworkInfo.roles(0), frameworkInfo.principal()));
+
+  driver.acceptOffers({offer.id()}, {RESERVE(reservedResources)});
+
+  AWAIT_READY(applyOperationMessage);
+
+  // We expect the master to detect the missing operation when the
+  // slave reregisters and to reconcile the operations on that slave.
+  Future<ReconcileOperationsMessage> reconcileOperationsMessage =
+    FUTURE_PROTOBUF(ReconcileOperationsMessage(), _, _);
+
+  // Simulate a master failover to trigger slave reregistration.
+  detector.appoint(master.get()->pid);
+
+  AWAIT_READY(reconcileOperationsMessage);
+
+  ASSERT_EQ(1, reconcileOperationsMessage->operations_size());
+  EXPECT_EQ(
+      applyOperationMessage->operation_uuid(),
+      reconcileOperationsMessage->operations(0).operation_uuid());
+}
+
 // This test verifies that the master reconciles tasks that are
 // missing from a reregistering slave. In this case, we trigger
 // a race between the slave re-registration message and the launch