You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gr...@apache.org on 2018/01/13 00:58:52 UTC

[1/4] mesos git commit: Fixed handling of terminal operations in `updateSlave` handler.

Repository: mesos
Updated Branches:
  refs/heads/master 53480e907 -> b4372acb1


Fixed handling of terminal operations in `updateSlave` handler.

An offer operation can be become terminal between any previously
received non-terminal offer operation status update and receiving an
`UpdateSlaveMessage` (e.g., if the agent failed over, or when the
agent was partitioned from the master).

The master will in its offer operations status handler attempt
to apply operations which became terminal since the last update. At
the same time, the total resources in an `UpdateSlaveMessage` would
already contain the result of applying the operation, and we need to
prevent the master from attempting to apply the same operation twice.

This patch updates the master handler for `UpdateSlaveMessage` to
transition pending operations which are reported as terminal without
also updating the resources on the agent as any update would already
be reflected in the new total from the `UpdateSlaveMessage.

Review: https://reviews.apache.org/r/65072/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b4372acb
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b4372acb
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b4372acb

Branch: refs/heads/master
Commit: b4372acb1133973e605c9c618e144ba3f4f74c16
Parents: 24c7104
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri Jan 12 15:40:46 2018 -0800
Committer: Greg Mann <gr...@gmail.com>
Committed: Fri Jan 12 16:53:50 2018 -0800

----------------------------------------------------------------------
 src/master/master.cpp | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b4372acb/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a207819..b0ec565 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -7583,6 +7583,39 @@ void Master::updateSlave(UpdateSlaveMessage&& message)
                 ->CopyFrom(providerId.get());
             }
           }
+
+          // If a known operation became terminal between any previous offer
+          // operation status update and this `UpdateSlaveMessage`, the total
+          // resources we were sent already had the operation applied. We need
+          // to update the state of the operation to terminal here so that any
+          // update sent by the agent later does not cause us to apply the
+          // operation again.
+          if (provider.newOperations.isSome() &&
+              provider.newOperations->contains(uuid)) {
+            Option<Operation> oldOperation = provider.oldOperations->get(uuid);
+            Option<Operation> newOperation = provider.newOperations->get(uuid);
+
+            CHECK_SOME(oldOperation);
+            CHECK_SOME(newOperation);
+
+            if (!protobuf::isTerminalState(
+                    oldOperation->latest_status().state()) &&
+                protobuf::isTerminalState(
+                    newOperation->latest_status().state())) {
+              Operation* operation = CHECK_NOTNULL(slave->getOperation(uuid));
+
+              UpdateOperationStatusMessage update =
+                protobuf::createUpdateOperationStatusMessage(
+                    uuid,
+                    newOperation->latest_status(),
+                    newOperation->latest_status(),
+                    operation->framework_id(),
+                    operation->slave_id());
+
+              updateOperation(
+                  operation, update, false); // Do not update resources.
+            }
+          }
         }
       }
 


[2/4] mesos git commit: Made it possible to update an operation without mutating resources.

Posted by gr...@apache.org.
Made it possible to update an operation without mutating resources.

In certain situations it can make sense to update the state of an
operation without also wanting to update resources. In this patch we
modify the master's `updateOperation` function to take an additional
parameter signifying whether resources should be updated, or whether
we only care about updating the operation and tracking of used
resources.

We will use this functionality in a subsequent patch to perform more
contained updates to agent state when processing `UpdateSlaveMessages`
which contain both resources and operations (and where any terminal
operations were already applied to the agent's resources).

Review: https://reviews.apache.org/r/65095/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/69930fe9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/69930fe9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/69930fe9

Branch: refs/heads/master
Commit: 69930fe93bcb513c16f4f575b504978d6e0d96dc
Parents: dd866f9
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri Jan 12 15:40:37 2018 -0800
Committer: Greg Mann <gr...@gmail.com>
Committed: Fri Jan 12 16:53:50 2018 -0800

----------------------------------------------------------------------
 src/master/master.cpp | 43 ++++++++++++++++++++++++++-----------------
 src/master/master.hpp |  9 ++++++---
 2 files changed, 32 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/69930fe9/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 6965514..b3fdc88 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -10289,7 +10289,8 @@ void Master::addOperation(
 
 void Master::updateOperation(
     Operation* operation,
-    const UpdateOperationStatusMessage& update)
+    const UpdateOperationStatusMessage& update,
+    bool convertResources)
 {
   CHECK_NOTNULL(operation);
 
@@ -10348,26 +10349,34 @@ void Master::updateOperation(
       const Resources converted =
         operation->latest_status().converted_resources();
 
-      allocator->updateAllocation(
-          operation->framework_id(),
-          operation->slave_id(),
-          consumed.get(),
-          {ResourceConversion(consumed.get(), converted)});
+      if (convertResources) {
+        allocator->updateAllocation(
+            operation->framework_id(),
+            operation->slave_id(),
+            consumed.get(),
+            {ResourceConversion(consumed.get(), converted)});
 
-      allocator->recoverResources(
-          operation->framework_id(),
-          operation->slave_id(),
-          converted,
-          None());
+        allocator->recoverResources(
+            operation->framework_id(),
+            operation->slave_id(),
+            converted,
+            None());
 
-      Resources consumedUnallocated = consumed.get();
-      consumedUnallocated.unallocate();
+        Resources consumedUnallocated = consumed.get();
+        consumedUnallocated.unallocate();
 
-      Resources convertedUnallocated = converted;
-      convertedUnallocated.unallocate();
+        Resources convertedUnallocated = converted;
+        convertedUnallocated.unallocate();
 
-      slave->apply(
-          {ResourceConversion(consumedUnallocated, convertedUnallocated)});
+        slave->apply(
+            {ResourceConversion(consumedUnallocated, convertedUnallocated)});
+      } else {
+        allocator->recoverResources(
+            operation->framework_id(),
+            operation->slave_id(),
+            consumed.get(),
+            None());
+      }
 
       break;
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/69930fe9/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 0c05b20..3d5180b 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -878,11 +878,14 @@ protected:
       Slave* slave,
       Operation* operation);
 
-  // Transitions the operation, and recovers resources if the
-  // operation becomes terminal.
+  // Transitions the operation, and updates and recovers resources if
+  // the operation becomes terminal. If `convertResources` is `false`
+  // only the consumed resources of terminal operations are recovered,
+  // but no resources are converted.
   void updateOperation(
       Operation* operation,
-      const UpdateOperationStatusMessage& update);
+      const UpdateOperationStatusMessage& update,
+      bool convertResources = true);
 
   // Remove the operation.
   void removeOperation(Operation* operation);


[3/4] mesos git commit: Fixed master's `updateOperation` for operations without framework ID.

Posted by gr...@apache.org.
Fixed master's `updateOperation` for operations without framework ID.

This patch fixes logging of master's `updateOperation` for operations
without framework ID. We also add a `CHECK` before the part updating
resources or the allocator for non-speculated operations; currently
non-speculated operations can only be initiated from a framework, but
not from e.g., the operation API, and additional work is needed to
support this.

Review: https://reviews.apache.org/r/65096/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/24c71047
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/24c71047
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/24c71047

Branch: refs/heads/master
Commit: 24c71047c6a866bdbc83af90650828f239ee07b0
Parents: 69930fe
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri Jan 12 15:40:42 2018 -0800
Committer: Greg Mann <gr...@gmail.com>
Committed: Fri Jan 12 16:53:50 2018 -0800

----------------------------------------------------------------------
 src/master/master.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/24c71047/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b3fdc88..a207819 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -10297,9 +10297,11 @@ void Master::updateOperation(
   const OperationStatus& status =
     update.has_latest_status() ? update.latest_status() : update.status();
 
-  LOG(INFO) << "Updating the state of operation '"
-            << operation->info().id() << "' (uuid: " << update.operation_uuid()
-            << ") of framework " << operation->framework_id()
+  LOG(INFO) << "Updating the state of operation '" << operation->info().id()
+            << "' (uuid: " << update.operation_uuid() << ") for"
+            << (operation->has_framework_id()
+                  ? " framework " + stringify(operation->framework_id())
+                  : " an operator API call")
             << " (latest state: " << operation->latest_status().state()
             << ", status update state: " << status.state() << ")";
 
@@ -10331,6 +10333,10 @@ void Master::updateOperation(
     return;
   }
 
+  // We currently do not support non-speculated operations not
+  // triggered by a framework (e.g., over the operator API).
+  CHECK(operation->has_framework_id());
+
   Try<Resources> consumed = protobuf::getConsumedResources(operation->info());
   CHECK_SOME(consumed);
 
@@ -10406,9 +10412,7 @@ void Master::updateOperation(
 
   slave->recoverResources(operation);
 
-  Framework* framework = operation->has_framework_id()
-    ? getFramework(operation->framework_id())
-    : nullptr;
+  Framework* framework = getFramework(operation->framework_id());
 
   if (framework != nullptr) {
     framework->recoverResources(operation);


[4/4] mesos git commit: Simplified master's `updateOperation` function.

Posted by gr...@apache.org.
Simplified master's `updateOperation` function.

This replaces repetitive code by instead using a helper variable which
allows us to avoid branching. We can then also replace a `Try<bool>`
with a plain `bool` further simplifying the code.

Review: https://reviews.apache.org/r/65093/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dd866f9f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dd866f9f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dd866f9f

Branch: refs/heads/master
Commit: dd866f9fd73858ef4d6369edddb4aa9a2f1c1a8f
Parents: 53480e9
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Fri Jan 12 15:40:32 2018 -0800
Committer: Greg Mann <gr...@gmail.com>
Committed: Fri Jan 12 16:53:50 2018 -0800

----------------------------------------------------------------------
 src/master/master.cpp | 48 +++++++++++++++-------------------------------
 1 file changed, 15 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/dd866f9f/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8738e10..6965514 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -10293,37 +10293,8 @@ void Master::updateOperation(
 {
   CHECK_NOTNULL(operation);
 
-  const OperationStatus& status = update.status();
-
-  Option<OperationStatus> latestStatus;
-  if (update.has_latest_status()) {
-    latestStatus = update.latest_status();
-  }
-
-  // Whether the operation has just become terminated.
-  Option<bool> terminated;
-
-  if (latestStatus.isSome()) {
-    terminated =
-      !protobuf::isTerminalState(operation->latest_status().state()) &&
-      protobuf::isTerminalState(latestStatus->state());
-
-    // If the operation has already transitioned to a terminal state,
-    // do not update its state.
-    if (!protobuf::isTerminalState(operation->latest_status().state())) {
-      operation->mutable_latest_status()->CopyFrom(latestStatus.get());
-    }
-  } else {
-    terminated =
-      !protobuf::isTerminalState(operation->latest_status().state()) &&
-      protobuf::isTerminalState(status.state());
-
-    if (!protobuf::isTerminalState(operation->latest_status().state())) {
-      operation->mutable_latest_status()->CopyFrom(status);
-    }
-  }
-
-  operation->add_statuses()->CopyFrom(status);
+  const OperationStatus& status =
+    update.has_latest_status() ? update.latest_status() : update.status();
 
   LOG(INFO) << "Updating the state of operation '"
             << operation->info().id() << "' (uuid: " << update.operation_uuid()
@@ -10331,9 +10302,20 @@ void Master::updateOperation(
             << " (latest state: " << operation->latest_status().state()
             << ", status update state: " << status.state() << ")";
 
-  CHECK_SOME(terminated);
+  // Whether the operation has just become terminated.
+  const bool terminated =
+    !protobuf::isTerminalState(operation->latest_status().state()) &&
+    protobuf::isTerminalState(status.state());
+
+  // If the operation has already transitioned to a terminal state,
+  // do not update its state.
+  if (!protobuf::isTerminalState(operation->latest_status().state())) {
+    operation->mutable_latest_status()->CopyFrom(status);
+  }
+
+  operation->add_statuses()->CopyFrom(status);
 
-  if (!terminated.get()) {
+  if (!terminated) {
     return;
   }