You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/03/30 17:35:39 UTC

[2/4] mesos git commit: Enabled pause/resume for general checks.

Enabled pause/resume for general checks.

Review: https://reviews.apache.org/r/57912/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3a689ab5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3a689ab5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3a689ab5

Branch: refs/heads/master
Commit: 3a689ab552a9ff23dd912e0178d3c5af393f7e84
Parents: 5c9ce37
Author: Gast�n Kleiman <ga...@mesosphere.io>
Authored: Thu Mar 30 13:41:14 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 30 19:34:24 2017 +0200

----------------------------------------------------------------------
 src/checks/checker.cpp            | 52 ++++++++++++++++++++++++++++++++--
 src/checks/checker.hpp            |  7 ++---
 src/launcher/default_executor.cpp | 22 +++++++++-----
 src/launcher/executor.cpp         |  4 +--
 4 files changed, 69 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.cpp b/src/checks/checker.cpp
index 1664acd..d1e9083 100644
--- a/src/checks/checker.cpp
+++ b/src/checks/checker.cpp
@@ -127,6 +127,9 @@ public:
       const Option<pid_t>& _taskPid,
       const std::vector<std::string>& _namespaces);
 
+  void pause();
+  void resume();
+
   virtual ~CheckerProcess() {}
 
 protected:
@@ -167,6 +170,7 @@ private:
   Option<lambda::function<pid_t(const lambda::function<int()>&)>> clone;
 
   CheckStatusInfo previousCheckStatus;
+  bool paused;
 };
 
 
@@ -208,9 +212,15 @@ Checker::~Checker()
 }
 
 
-void Checker::stop()
+void Checker::pause()
+{
+  dispatch(process.get(), &CheckerProcess::pause);
+}
+
+
+void Checker::resume()
 {
-  terminate(process.get(), true);
+  dispatch(process.get(), &CheckerProcess::resume);
 }
 
 
@@ -225,7 +235,8 @@ CheckerProcess::CheckerProcess(
     updateCallback(_callback),
     taskId(_taskId),
     taskPid(_taskPid),
-    namespaces(_namespaces)
+    namespaces(_namespaces),
+    paused(false)
 {
   Try<Duration> create = Duration::create(check.delay_seconds());
   CHECK_SOME(create);
@@ -286,6 +297,10 @@ void CheckerProcess::finalize()
 
 void CheckerProcess::performCheck()
 {
+  if (paused) {
+    return;
+  }
+
   Stopwatch stopwatch;
   stopwatch.start();
 
@@ -314,16 +329,47 @@ void CheckerProcess::performCheck()
 
 void CheckerProcess::scheduleNext(const Duration& duration)
 {
+  CHECK(!paused);
+
   VLOG(1) << "Scheduling check for task '" << taskId << "' in " << duration;
 
   delay(duration, self(), &Self::performCheck);
 }
 
 
+void CheckerProcess::pause()
+{
+  if (!paused) {
+    VLOG(1) << "Checking for task '" << taskId << "' paused";
+
+    paused = true;
+  }
+}
+
+
+void CheckerProcess::resume()
+{
+  if (paused) {
+    VLOG(1) << "Checking for task '" << taskId << "' resumed";
+
+    paused = false;
+
+    // Schedule a check immediately.
+    scheduleNext(Duration::zero());
+  }
+}
+
 void CheckerProcess::processCheckResult(
     const Stopwatch& stopwatch,
     const CheckStatusInfo& result)
 {
+  // `Checker` might have been paused while performing the check.
+  if (paused) {
+    LOG(INFO) << "Ignoring " << check.type() << " check result for"
+              << " task '" << taskId << "': checking is paused";
+    return;
+  }
+
   VLOG(1) << "Performed " << check.type() << " check"
           << " for task '" << taskId << "' in " << stopwatch.elapsed();
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.hpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.hpp b/src/checks/checker.hpp
index e8af316..1521b9c 100644
--- a/src/checks/checker.hpp
+++ b/src/checks/checker.hpp
@@ -68,10 +68,9 @@ public:
   Checker(const Checker&) = delete;
   Checker& operator=(const Checker&) = delete;
 
-  /**
-   * Immediately stops checking. Any in-flight checks are dropped.
-   */
-  void stop();
+  // Idempotent helpers for pausing and resuming checking.
+  void pause();
+  void resume();
 
 private:
   explicit Checker(process::Owned<CheckerProcess> process);

http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index 606fd9c..79785fc 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -163,8 +163,12 @@ public:
       }
     }
 
-    // Pause all health checks.
+    // Pause all checks and health checks.
     foreachvalue (Owned<Container> container, containers) {
+      if (container->checker.isSome()) {
+        container->checker->get()->pause();
+      }
+
       if (container->healthChecker.isSome()) {
         container->healthChecker->get()->pause();
       }
@@ -193,8 +197,12 @@ public:
           wait(containers.keys());
         }
 
-        // Resume all health checks.
+        // Resume all checks and health checks.
         foreachvalue (Owned<Container> container, containers) {
+          if (container->checker.isSome()) {
+            container->checker->get()->resume();
+          }
+
           if (container->healthChecker.isSome()) {
             container->healthChecker->get()->resume();
           }
@@ -738,11 +746,11 @@ protected:
       deserialize<agent::Response>(contentType, response->body);
     CHECK_SOME(waitResponse);
 
-    // If there is an associated checker with the task, stop it to
-    // avoid sending check updates after a terminal status update.
+    // If the task is checked, pause the associated checker to avoid
+    // sending check updates after a terminal status update.
     if (container->checker.isSome()) {
       CHECK_NOTNULL(container->checker->get());
-      container->checker->get()->stop();
+      container->checker->get()->pause();
       container->checker = None();
     }
 
@@ -931,13 +939,13 @@ protected:
     CHECK(!container->killing);
     container->killing = true;
 
-    // If the task is checked, stop the associated checker.
+    // If the task is checked, pause the associated checker.
     //
     // TODO(alexr): Once we support `TASK_KILLING` in this executor,
     // consider continuing checking the task after sending `TASK_KILLING`.
     if (container->checker.isSome()) {
       CHECK_NOTNULL(container->checker->get());
-      container->checker->get()->stop();
+      container->checker->get()->pause();
       container->checker = None();
     }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 8bd266e..bc69beb 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -731,7 +731,7 @@ private:
 
       // Stop checking the task.
       if (checker.get() != nullptr) {
-        checker->stop();
+        checker->pause();
       }
 
       // Stop health checking the task.
@@ -776,7 +776,7 @@ private:
 
     // Stop checking the task.
     if (checker.get() != nullptr) {
-      checker->stop();
+      checker->pause();
     }
 
     // Stop health checking the task.