You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by an...@apache.org on 2016/10/04 02:45:39 UTC

[1/3] mesos git commit: Disallowed HTTP executors to subscribe before containerizer recovery.

Repository: mesos
Updated Branches:
  refs/heads/1.0.x 837bb4ecc -> c76daaddb


Disallowed HTTP executors to subscribe before containerizer recovery.

Previously, it was possible for a HTTP based executor to subscribe
with the agent before the containerizer recovery is done. This
was a problem since calling `containerizer->update()` etc. would
result in a failure.

Review: https://reviews.apache.org/r/52408/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/df5ba09e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/df5ba09e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/df5ba09e

Branch: refs/heads/1.0.x
Commit: df5ba09ea93b06c5c2080766c1b317dc8fb1949d
Parents: 96a8d84
Author: Anand Mazumdar <an...@apache.org>
Authored: Thu Sep 29 16:38:07 2016 -0700
Committer: Anand Mazumdar <an...@apache.org>
Committed: Mon Oct 3 19:42:04 2016 -0700

----------------------------------------------------------------------
 src/slave/http.cpp  | 5 +++++
 src/slave/slave.cpp | 4 ++++
 src/slave/slave.hpp | 5 +++++
 3 files changed, 14 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/df5ba09e/src/slave/http.cpp
----------------------------------------------------------------------
diff --git a/src/slave/http.cpp b/src/slave/http.cpp
index 86803fe..7ba213a 100644
--- a/src/slave/http.cpp
+++ b/src/slave/http.cpp
@@ -414,6 +414,11 @@ string Slave::Http::EXECUTOR_HELP() {
 
 Future<Response> Slave::Http::executor(const Request& request) const
 {
+  if (!slave->recoveryInfo.reconnect) {
+    CHECK(slave->state == RECOVERING);
+    return ServiceUnavailable("Agent has not finished recovery");
+  }
+
   // TODO(anand): Add metrics for rejected requests.
 
   if (request.method != "POST") {

http://git-wip-us.apache.org/repos/asf/mesos/blob/df5ba09e/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index abec13b..1ee580e 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4725,6 +4725,10 @@ Future<Nothing> Slave::_recoverContainerizer(
 
 Future<Nothing> Slave::_recover()
 {
+  // Alow HTTP based executors to subscribe after the
+  // containerizer recovery is complete.
+  recoveryInfo.reconnect = true;
+
   foreachvalue (Framework* framework, frameworks) {
     foreachvalue (Executor* executor, framework->executors) {
       // Set up callback for executor termination.

http://git-wip-us.apache.org/repos/asf/mesos/blob/df5ba09e/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 10e097a..1975d93 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -303,6 +303,11 @@ public:
     // Flag to indicate if recovery, including reconciling
     // (i.e., reconnect/kill) with executors is finished.
     process::Promise<Nothing> recovered;
+
+    // Flag to indicate that HTTP based executors can
+    // subscribe with the agent. We allow them to subscribe
+    // after the agent recovers the containerizer.
+    bool reconnect = false;
   } recoveryInfo;
 
   // TODO(benh): Clang requires members to be public in order to take


[3/3] mesos git commit: Added MESOS-6274 to 1.0.2 CHANGELOG.

Posted by an...@apache.org.
Added MESOS-6274 to 1.0.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c76daadd
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c76daadd
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c76daadd

Branch: refs/heads/1.0.x
Commit: c76daaddbac78bd37dca07a230a8a2bb46b2c46b
Parents: df5ba09
Author: Anand Mazumdar <an...@apache.org>
Authored: Mon Oct 3 18:25:12 2016 -0700
Committer: Anand Mazumdar <an...@apache.org>
Committed: Mon Oct 3 19:42:31 2016 -0700

----------------------------------------------------------------------
 CHANGELOG | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c76daadd/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index ca9192d..ae4d1e9 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,6 +18,8 @@ All Issues:
     * [MESOS-6246] - Libprocess links will not generate an ExitedEvent if the socket creation fails.
     * [MESOS-6299] - Master doesn't remove task from pending when it is invalid.
     * [MESOS-6026] - Tasks mistakenly marked as FAILED due to race b/w sendExecutorTerminatedStatusUpdate() and _statusUpdate().
+    * [MESOS-6274] - Agent should not allow HTTP based executors to subscribe
+      before containerizer recovery is done.
 
 
 Release Notes - Mesos - Version 1.0.1


[2/3] mesos git commit: Added `RecoveryInfo` struct to the agent.

Posted by an...@apache.org.
Added `RecoveryInfo` struct to the agent.

This struct would container all the recovery related metadata
on the agent from now on. Eventually, we would add component
specific recovery information to this struct e.g, the executors
can now subscribe again with the agent etc.

Review: https://reviews.apache.org/r/52407/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/96a8d84d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/96a8d84d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/96a8d84d

Branch: refs/heads/1.0.x
Commit: 96a8d84d3fbf36e3b2227a697ceb23780c7d154c
Parents: 837bb4e
Author: Anand Mazumdar <an...@apache.org>
Authored: Thu Sep 29 16:37:53 2016 -0700
Committer: Anand Mazumdar <an...@apache.org>
Committed: Mon Oct 3 19:42:04 2016 -0700

----------------------------------------------------------------------
 src/slave/slave.cpp |  6 +++---
 src/slave/slave.hpp | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/96a8d84d/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 09be083..abec13b 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -3192,7 +3192,7 @@ void Slave::reregisterExecutorTimeout()
   }
 
   // Signal the end of recovery.
-  recovered.set(Nothing());
+  recoveryInfo.recovered.set(Nothing());
 }
 
 
@@ -4784,7 +4784,7 @@ Future<Nothing> Slave::_recover()
     // We set 'recovered' flag inside reregisterExecutorTimeout(),
     // so that when the slave re-registers with master it can
     // correctly inform the master about the launched tasks.
-    return recovered.future();
+    return recoveryInfo.recovered.future();
   }
 
   return Nothing();
@@ -4885,7 +4885,7 @@ void Slave::__recover(const Future<Nothing>& future)
     // doesn't happen within a timeout.
   }
 
-  recovered.set(Nothing()); // Signal recovery.
+  recoveryInfo.recovered.set(Nothing()); // Signal recovery.
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/96a8d84d/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 95223ec..10e097a 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -297,6 +297,14 @@ public:
     TERMINATING,  // Slave is shutting down.
   } state;
 
+  // Describes information about agent recovery.
+  struct RecoveryInfo
+  {
+    // Flag to indicate if recovery, including reconciling
+    // (i.e., reconnect/kill) with executors is finished.
+    process::Promise<Nothing> recovered;
+  } recoveryInfo;
+
   // TODO(benh): Clang requires members to be public in order to take
   // their address which we do in tests (for things like
   // FUTURE_DISPATCH).
@@ -642,10 +650,6 @@ private:
   // the master.
   process::Timer pingTimer;
 
-  // Flag to indicate if recovery, including reconciling (i.e., reconnect/kill)
-  // with executors is finished.
-  process::Promise<Nothing> recovered;
-
   // Root meta directory containing checkpointed data.
   const std::string metaDir;