You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/03/30 17:35:39 UTC
[2/4] mesos git commit: Enabled pause/resume for general checks.
Enabled pause/resume for general checks.
Review: https://reviews.apache.org/r/57912/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3a689ab5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3a689ab5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3a689ab5
Branch: refs/heads/master
Commit: 3a689ab552a9ff23dd912e0178d3c5af393f7e84
Parents: 5c9ce37
Author: Gast�n Kleiman <ga...@mesosphere.io>
Authored: Thu Mar 30 13:41:14 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 30 19:34:24 2017 +0200
----------------------------------------------------------------------
src/checks/checker.cpp | 52 ++++++++++++++++++++++++++++++++--
src/checks/checker.hpp | 7 ++---
src/launcher/default_executor.cpp | 22 +++++++++-----
src/launcher/executor.cpp | 4 +--
4 files changed, 69 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.cpp b/src/checks/checker.cpp
index 1664acd..d1e9083 100644
--- a/src/checks/checker.cpp
+++ b/src/checks/checker.cpp
@@ -127,6 +127,9 @@ public:
const Option<pid_t>& _taskPid,
const std::vector<std::string>& _namespaces);
+ void pause();
+ void resume();
+
virtual ~CheckerProcess() {}
protected:
@@ -167,6 +170,7 @@ private:
Option<lambda::function<pid_t(const lambda::function<int()>&)>> clone;
CheckStatusInfo previousCheckStatus;
+ bool paused;
};
@@ -208,9 +212,15 @@ Checker::~Checker()
}
-void Checker::stop()
+void Checker::pause()
+{
+ dispatch(process.get(), &CheckerProcess::pause);
+}
+
+
+void Checker::resume()
{
- terminate(process.get(), true);
+ dispatch(process.get(), &CheckerProcess::resume);
}
@@ -225,7 +235,8 @@ CheckerProcess::CheckerProcess(
updateCallback(_callback),
taskId(_taskId),
taskPid(_taskPid),
- namespaces(_namespaces)
+ namespaces(_namespaces),
+ paused(false)
{
Try<Duration> create = Duration::create(check.delay_seconds());
CHECK_SOME(create);
@@ -286,6 +297,10 @@ void CheckerProcess::finalize()
void CheckerProcess::performCheck()
{
+ if (paused) {
+ return;
+ }
+
Stopwatch stopwatch;
stopwatch.start();
@@ -314,16 +329,47 @@ void CheckerProcess::performCheck()
void CheckerProcess::scheduleNext(const Duration& duration)
{
+ CHECK(!paused);
+
VLOG(1) << "Scheduling check for task '" << taskId << "' in " << duration;
delay(duration, self(), &Self::performCheck);
}
+void CheckerProcess::pause()
+{
+ if (!paused) {
+ VLOG(1) << "Checking for task '" << taskId << "' paused";
+
+ paused = true;
+ }
+}
+
+
+void CheckerProcess::resume()
+{
+ if (paused) {
+ VLOG(1) << "Checking for task '" << taskId << "' resumed";
+
+ paused = false;
+
+ // Schedule a check immediately.
+ scheduleNext(Duration::zero());
+ }
+}
+
void CheckerProcess::processCheckResult(
const Stopwatch& stopwatch,
const CheckStatusInfo& result)
{
+ // `Checker` might have been paused while performing the check.
+ if (paused) {
+ LOG(INFO) << "Ignoring " << check.type() << " check result for"
+ << " task '" << taskId << "': checking is paused";
+ return;
+ }
+
VLOG(1) << "Performed " << check.type() << " check"
<< " for task '" << taskId << "' in " << stopwatch.elapsed();
http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/checks/checker.hpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.hpp b/src/checks/checker.hpp
index e8af316..1521b9c 100644
--- a/src/checks/checker.hpp
+++ b/src/checks/checker.hpp
@@ -68,10 +68,9 @@ public:
Checker(const Checker&) = delete;
Checker& operator=(const Checker&) = delete;
- /**
- * Immediately stops checking. Any in-flight checks are dropped.
- */
- void stop();
+ // Idempotent helpers for pausing and resuming checking.
+ void pause();
+ void resume();
private:
explicit Checker(process::Owned<CheckerProcess> process);
http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index 606fd9c..79785fc 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -163,8 +163,12 @@ public:
}
}
- // Pause all health checks.
+ // Pause all checks and health checks.
foreachvalue (Owned<Container> container, containers) {
+ if (container->checker.isSome()) {
+ container->checker->get()->pause();
+ }
+
if (container->healthChecker.isSome()) {
container->healthChecker->get()->pause();
}
@@ -193,8 +197,12 @@ public:
wait(containers.keys());
}
- // Resume all health checks.
+ // Resume all checks and health checks.
foreachvalue (Owned<Container> container, containers) {
+ if (container->checker.isSome()) {
+ container->checker->get()->resume();
+ }
+
if (container->healthChecker.isSome()) {
container->healthChecker->get()->resume();
}
@@ -738,11 +746,11 @@ protected:
deserialize<agent::Response>(contentType, response->body);
CHECK_SOME(waitResponse);
- // If there is an associated checker with the task, stop it to
- // avoid sending check updates after a terminal status update.
+ // If the task is checked, pause the associated checker to avoid
+ // sending check updates after a terminal status update.
if (container->checker.isSome()) {
CHECK_NOTNULL(container->checker->get());
- container->checker->get()->stop();
+ container->checker->get()->pause();
container->checker = None();
}
@@ -931,13 +939,13 @@ protected:
CHECK(!container->killing);
container->killing = true;
- // If the task is checked, stop the associated checker.
+ // If the task is checked, pause the associated checker.
//
// TODO(alexr): Once we support `TASK_KILLING` in this executor,
// consider continuing checking the task after sending `TASK_KILLING`.
if (container->checker.isSome()) {
CHECK_NOTNULL(container->checker->get());
- container->checker->get()->stop();
+ container->checker->get()->pause();
container->checker = None();
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/3a689ab5/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 8bd266e..bc69beb 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -731,7 +731,7 @@ private:
// Stop checking the task.
if (checker.get() != nullptr) {
- checker->stop();
+ checker->pause();
}
// Stop health checking the task.
@@ -776,7 +776,7 @@ private:
// Stop checking the task.
if (checker.get() != nullptr) {
- checker->stop();
+ checker->pause();
}
// Stop health checking the task.