You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2016/08/28 15:04:01 UTC
[01/10] mesos git commit: Removed the health check binary from
libprocess.
Repository: mesos
Updated Branches:
refs/heads/master 537584cf2 -> 037a346a2
Removed the health check binary from libprocess.
With https://reviews.apache.org/r/49389/ and
https://reviews.apache.org/r/49555/, both command and
docker executors use health checks via the library. Hence
the mesos-health-check binary is not necessary anymore.
Review: https://reviews.apache.org/r/50657/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c09bb378
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c09bb378
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c09bb378
Branch: refs/heads/master
Commit: c09bb378c4b31c3a28accd01b6d6522372ced1ab
Parents: b16e5fa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:32:20 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200
----------------------------------------------------------------------
3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake | 5 -----
1 file changed, 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/c09bb378/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake b/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
index 497f610..26f95da 100644
--- a/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
+++ b/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
@@ -152,11 +152,6 @@ set(
MESOS_FETCHER mesos-fetcher
CACHE STRING "Target for fetcher")
-# Define target for health-check.
-set(
- MESOS_HEALTH_CHECK mesos-health-check
- CACHE STRING "Target for healt-check")
-
# Define target for master.
set(
MESOS_MASTER mesos-master
[04/10] mesos git commit: Removed the health check binary.
Posted by al...@apache.org.
Removed the health check binary.
With https://reviews.apache.org/r/49389/ and
https://reviews.apache.org/r/49555/, both command and
docker executors use health checks via the library. Hence
the mesos-health-check binary is not necessary anymore.
Review: https://reviews.apache.org/r/49556/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f19a6aa0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f19a6aa0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f19a6aa0
Branch: refs/heads/master
Commit: f19a6aa07d418b7469745248b5b1c23ccf155615
Parents: c09bb37
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:32:58 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:55 2016 +0200
----------------------------------------------------------------------
src/CMakeLists.txt | 1 -
src/Makefile.am | 5 -
src/health-check/CMakeLists.txt | 46 ---------
src/health-check/main.cpp | 176 -----------------------------------
4 files changed, 228 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ccd9892..b14132a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -516,7 +516,6 @@ target_link_libraries(${MESOS_PROTOBUF_TARGET} ${AGENT_LIBS})
##############################
add_subdirectory(cli/)
add_subdirectory(docker/)
-add_subdirectory(health-check/)
add_subdirectory(launcher/)
add_subdirectory(local/)
add_subdirectory(master/)
http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index b577b42..a307482 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1356,11 +1356,6 @@ mesos_network_helper_CPPFLAGS = $(MESOS_CPPFLAGS)
mesos_network_helper_LDADD = libmesos.la $(LDADD)
endif
-pkglibexec_PROGRAMS += mesos-health-check
-mesos_health_check_SOURCES = health-check/main.cpp
-mesos_health_check_CPPFLAGS = $(MESOS_CPPFLAGS)
-mesos_health_check_LDADD = libmesos.la $(LDADD)
-
pkglibexec_PROGRAMS += mesos-usage
mesos_usage_SOURCES = usage/main.cpp
mesos_usage_CPPFLAGS = $(MESOS_CPPFLAGS)
http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/health-check/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/health-check/CMakeLists.txt b/src/health-check/CMakeLists.txt
deleted file mode 100644
index 69e9566..0000000
--- a/src/health-check/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# MESOS HEALTH SOURCE.
-######################
-set(HEALTH_EXECUTABLE_SRC
- ${HEALTH_EXECUTABLE_SRC}
- main.cpp
- )
-
-# INCLUDE DIRECTIVES FOR MESOS HEALTH CHECK EXECUTABLE (generates, e.g.,
-# -I/path/to/thing on Linux).
-########################################################################
-include_directories(${AGENT_INCLUDE_DIRS})
-
-# LINKING LIBRARIES BY DIRECTORY (might generate, e.g., -L/path/to/thing on
-# Linux).
-###########################################################################
-link_directories(${AGENT_LIB_DIRS})
-
-# THE MESOS HELATH CHECK EXECUTABLE.
-####################################
-add_executable(${MESOS_HEALTH_CHECK} ${HEALTH_EXECUTABLE_SRC})
-
-# ADD LINKER FLAGS (generates, e.g., -lglog on Linux).
-######################################################
-target_link_libraries(${MESOS_HEALTH_CHECK} ${AGENT_LIBS} ${MESOS_TARGET})
-
-
-# ADD BINARY DEPENDENCIES (tells CMake what to compile/build first).
-####################################################################
-add_dependencies(${MESOS_HEALTH_CHECK} ${MESOS_TARGET})
http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/health-check/main.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/main.cpp b/src/health-check/main.cpp
deleted file mode 100644
index 5346e30..0000000
--- a/src/health-check/main.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <signal.h>
-#include <stdio.h>
-#include <string.h>
-#ifndef __WINDOWS__
-#include <unistd.h>
-#endif // __WINDOWS__
-
-#include <iostream>
-#include <string>
-
-#include <mesos/mesos.hpp>
-
-#include <process/future.hpp>
-#include <process/pid.hpp>
-#include <process/process.hpp>
-#include <process/protobuf.hpp>
-
-#include <stout/flags.hpp>
-#include <stout/json.hpp>
-#include <stout/option.hpp>
-#include <stout/path.hpp>
-#include <stout/protobuf.hpp>
-
-#include "health-check/health_checker.hpp"
-
-using namespace mesos;
-
-using std::cout;
-using std::cerr;
-using std::endl;
-using std::string;
-
-using process::UPID;
-
-class Flags : public virtual flags::FlagsBase
-{
-public:
- Flags()
- {
- add(&Flags::health_check_json,
- "health_check_json",
- "JSON describing health check to perform");
-
- add(&Flags::executor,
- "executor",
- "Executor UPID to send health check messages to");
-
- add(&Flags::task_id,
- "task_id",
- "Task ID that this health check process is checking");
- }
-
- Option<string> health_check_json;
- Option<UPID> executor;
- Option<string> task_id;
-};
-
-
-void usage(const char* argv0, const flags::FlagsBase& flags)
-{
- cerr << "Usage: " << Path(argv0).basename() << " [...]" << endl
- << endl
- << "Supported options:" << endl
- << flags.usage();
-}
-
-
-int main(int argc, char** argv)
-{
- GOOGLE_PROTOBUF_VERIFY_VERSION;
-
- Flags flags;
-
- Try<flags::Warnings> load = flags.load(None(), argc, argv);
-
- if (load.isError()) {
- cerr << flags.usage(load.error()) << endl;
- return EXIT_FAILURE;
- }
-
- if (flags.help) {
- cout << flags.usage() << endl;
- return EXIT_SUCCESS;
- }
-
- // Log any flag warnings.
- foreach (const flags::Warning& warning, load->warnings) {
- LOG(WARNING) << warning.message;
- }
-
- if (flags.health_check_json.isNone()) {
- cerr << flags.usage("Expected JSON with health check description") << endl;
- return EXIT_FAILURE;
- }
-
- Try<JSON::Object> parse =
- JSON::parse<JSON::Object>(flags.health_check_json.get());
-
- if (parse.isError()) {
- cerr << flags.usage("Failed to parse --health_check_json: " + parse.error())
- << endl;
- return EXIT_FAILURE;
- }
-
- Try<HealthCheck> check = protobuf::parse<HealthCheck>(parse.get());
-
- if (check.isError()) {
- cerr << flags.usage("Failed to parse --health_check_json: " + check.error())
- << endl;
- return EXIT_SUCCESS;
- }
-
- if (flags.executor.isNone()) {
- cerr << flags.usage("Missing required option --executor") << endl;
- return EXIT_FAILURE;
- }
-
- if (check.get().has_http() && check.get().has_command()) {
- cerr << flags.usage("Both 'http' and 'command' health check requested")
- << endl;
- return EXIT_FAILURE;
- }
-
- if (!check.get().has_http() && !check.get().has_command()) {
- cerr << flags.usage("Expecting one of 'http' or 'command' health check")
- << endl;
- return EXIT_FAILURE;
- }
-
- if (flags.task_id.isNone()) {
- cerr << flags.usage("Missing required option --task_id") << endl;
- return EXIT_FAILURE;
- }
-
- TaskID taskID;
- taskID.set_value(flags.task_id.get());
-
- mesos::internal::health::HealthCheckerProcess process(
- check.get(),
- flags.executor.get(),
- taskID);
-
- process::spawn(&process);
-
- process::Future<Nothing> checking =
- process::dispatch(
- process, &mesos::internal::health::HealthCheckerProcess::healthCheck);
-
- checking.await();
-
- process::terminate(process);
- process::wait(process);
-
- if (checking.isFailed()) {
- LOG(WARNING) << "Health check failed " << checking.failure();
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
[05/10] mesos git commit: Refactored `_commandHealthCheck` in
HealthChecker.
Posted by al...@apache.org.
Refactored `_commandHealthCheck` in HealthChecker.
* Remove blocking `Future::await` call.
* Adjust the level of some logs.
* Adjust style.
* Change the interface of health check handlers to
`Future<Nothing>` to make errors handling more easier.
Review: https://reviews.apache.org/r/51069/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7380d130
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7380d130
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7380d130
Branch: refs/heads/master
Commit: 7380d130a4b03fd110acbcb0636f8615b8f28cee
Parents: f19a6aa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:12 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:41 2016 +0200
----------------------------------------------------------------------
src/health-check/health_checker.cpp | 123 ++++++++++++++++++-------------
src/health-check/health_checker.hpp | 10 ++-
2 files changed, 78 insertions(+), 55 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/7380d130/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 1a2a6df..097117a 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -29,6 +29,7 @@
#include <mesos/mesos.hpp>
+#include <process/collect.hpp>
#include <process/delay.hpp>
#include <process/subprocess.hpp>
@@ -45,6 +46,7 @@
using process::delay;
using process::Clock;
+using process::Failure;
using process::Future;
using process::NO_SETSID;
using process::Owned;
@@ -116,9 +118,9 @@ HealthCheckerProcess::HealthCheckerProcess(
Future<Nothing> HealthCheckerProcess::healthCheck()
{
- VLOG(2) << "Health checks starting in "
- << Seconds(check.delay_seconds()) << ", grace period "
- << Seconds(check.grace_period_seconds());
+ VLOG(1) << "Health check starting in "
+ << Seconds(check.delay_seconds()) << ", grace period "
+ << Seconds(check.grace_period_seconds());
startTime = Clock::now();
@@ -137,7 +139,8 @@ void HealthCheckerProcess::failure(const string& message)
}
consecutiveFailures++;
- VLOG(1) << "#" << consecutiveFailures << " check failed: " << message;
+ LOG(WARNING) << "Health check failed " << consecutiveFailures
+ << " times consecutively: " << message;
bool killTask = consecutiveFailures >= check.consecutive_failures();
@@ -163,7 +166,7 @@ void HealthCheckerProcess::failure(const string& message)
void HealthCheckerProcess::success()
{
- VLOG(1) << "Check passed";
+ VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check passed";
// Send a healthy status update on the first success,
// and on the first success following failure(s).
@@ -182,30 +185,49 @@ void HealthCheckerProcess::success()
void HealthCheckerProcess::_healthCheck()
{
+ Future<Nothing> checkResult;
+
switch (check.type()) {
case HealthCheck::COMMAND: {
- _commandHealthCheck();
- return;
+ checkResult = _commandHealthCheck();
+ break;
}
case HealthCheck::HTTP: {
- _httpHealthCheck();
- return;
+ checkResult = _httpHealthCheck();
+ break;
}
case HealthCheck::TCP: {
- _tcpHealthCheck();
- return;
+ checkResult = _tcpHealthCheck();
+ break;
}
default: {
UNREACHABLE();
}
}
+
+ checkResult.onAny(defer(self(), &Self::__healthCheck, lambda::_1));
+}
+
+
+void HealthCheckerProcess::__healthCheck(const Future<Nothing>& future)
+{
+ if (future.isReady()) {
+ success();
+ return;
+ }
+
+ string message = HealthCheck::Type_Name(check.type()) +
+ " health check failed: " +
+ (future.isFailed() ? future.failure() : "discarded");
+
+ failure(message);
}
-void HealthCheckerProcess::_commandHealthCheck()
+Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
{
CHECK_EQ(HealthCheck::COMMAND, check.type());
CHECK(check.has_command());
@@ -220,11 +242,11 @@ void HealthCheckerProcess::_commandHealthCheck()
}
// Launch the subprocess.
- Option<Try<Subprocess>> external = None();
+ Try<Subprocess> external = Error("Not launched");
if (command.shell()) {
// Use the shell variant.
- VLOG(2) << "Launching health command '" << command.value() << "'";
+ VLOG(1) << "Launching command health check '" << command.value() << "'";
external = subprocess(
command.value(),
@@ -240,7 +262,7 @@ void HealthCheckerProcess::_commandHealthCheck()
argv.push_back(arg);
}
- VLOG(2) << "Launching health command [" << command.value() << ", "
+ VLOG(1) << "Launching command health check [" << command.value() << ", "
<< strings::join(", ", argv) << "]";
external = subprocess(
@@ -254,72 +276,69 @@ void HealthCheckerProcess::_commandHealthCheck()
environment);
}
- CHECK_SOME(external);
-
- if (external.get().isError()) {
- failure("Error creating subprocess for healthcheck: " +
- external.get().error());
- return;
+ if (external.isError()) {
+ return Failure("Failed to create subprocess: " + external.error());
}
- pid_t commandPid = external.get().get().pid();
+ pid_t commandPid = external->pid();
+ Duration timeout = Seconds(check.timeout_seconds());
- Future<Option<int>> status = external.get().get().status();
- status.await(Seconds(check.timeout_seconds()));
+ return external->status()
+ .after(timeout, [timeout, commandPid](Future<Option<int>> future) {
+ future.discard();
- if (!status.isReady()) {
- string msg = "Command check failed with reason: ";
- if (status.isFailed()) {
- msg += "failed with error: " + status.failure();
- } else if (status.isDiscarded()) {
- msg += "status future discarded";
- } else {
- msg += "status still pending after timeout " +
- stringify(Seconds(check.timeout_seconds()));
- }
+ if (commandPid != -1) {
+ // Cleanup the external command process.
+ VLOG(1) << "Killing the command health check process " << commandPid;
- if (commandPid != -1) {
- // Cleanup the external command process.
- os::killtree(commandPid, SIGKILL);
- VLOG(1) << "Kill health check command " << commandPid;
- }
+ os::killtree(commandPid, SIGKILL);
+ }
- failure(msg);
- return;
- }
+ return Failure(
+ "Command has not returned after " + stringify(timeout) +
+ "; aborting");
+ })
+ .then([](const Option<int>& status) -> Future<Nothing> {
+ if (status.isNone()) {
+ return Failure("Failed to reap the command process");
+ }
- int statusCode = status.get().get();
- if (statusCode != 0) {
- string message = "Health command check " + WSTRINGIFY(statusCode);
- failure(message);
- } else {
- success();
- }
+ int statusCode = status.get();
+ if (statusCode != 0) {
+ return Failure("Command returned " + WSTRINGIFY(statusCode));
+ }
+
+ return Nothing();
+ });
}
-void HealthCheckerProcess::_httpHealthCheck()
+Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
{
CHECK_EQ(HealthCheck::HTTP, check.type());
CHECK(check.has_http());
promise.fail("HTTP health check is not supported");
+
+ return Nothing();
}
-void HealthCheckerProcess::_tcpHealthCheck()
+Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
{
CHECK_EQ(HealthCheck::TCP, check.type());
CHECK(check.has_tcp());
promise.fail("TCP health check is not supported");
+
+ return Nothing();
}
void HealthCheckerProcess::reschedule()
{
VLOG(1) << "Rescheduling health check in "
- << Seconds(check.interval_seconds());
+ << Seconds(check.interval_seconds());
delay(Seconds(check.interval_seconds()), self(), &Self::_healthCheck);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/7380d130/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index b4548f3..83cedfb 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -76,9 +76,13 @@ private:
void _healthCheck();
- void _commandHealthCheck();
- void _httpHealthCheck();
- void _tcpHealthCheck();
+ void __healthCheck(const process::Future<Nothing>& future);
+
+ process::Future<Nothing> _commandHealthCheck();
+
+ process::Future<Nothing> _httpHealthCheck();
+
+ process::Future<Nothing> _tcpHealthCheck();
void reschedule();
[09/10] mesos git commit: Entered appropriate task's namespaces
during health checking.
Posted by al...@apache.org.
Entered appropriate task's namespaces during health checking.
Review: https://reviews.apache.org/r/51379/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bb084fb8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bb084fb8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bb084fb8
Branch: refs/heads/master
Commit: bb084fb84cb59ed8eb1a554b17141d09ff0ed262
Parents: d2f8bfa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:34:41 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:58 2016 +0200
----------------------------------------------------------------------
src/docker/executor.cpp | 15 ++++++-
src/health-check/health_checker.cpp | 73 ++++++++++++++++++++++++++++----
src/health-check/health_checker.hpp | 25 ++++++++++-
src/launcher/executor.cpp | 16 ++++++-
4 files changed, 117 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 5508819..f5981b0 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -181,6 +181,8 @@ public:
inspect = docker->inspect(containerName, DOCKER_INSPECT_DELAY)
.then(defer(self(), [=](const Docker::Container& container) {
if (!killed) {
+ containerPid = container.pid;
+
TaskStatus status;
status.mutable_task_id()->CopyFrom(taskId.get());
status.set_state(TASK_RUNNING);
@@ -510,10 +512,20 @@ private:
strings::join(" ", commandArguments));
}
+ vector<string> namespaces;
+ if (healthCheck.type() == HealthCheck::HTTP ||
+ healthCheck.type() == HealthCheck::TCP) {
+ // Make sure HTTP and TCP health checks are run
+ // from the container's network namespace.
+ namespaces.push_back("net");
+ }
+
Try<Owned<health::HealthChecker>> _checker = health::HealthChecker::create(
healthCheck,
self(),
- task.task_id());
+ task.task_id(),
+ containerPid,
+ namespaces);
if (_checker.isError()) {
// TODO(gilbert): Consider ABORT and return a TASK_FAILED here.
@@ -555,6 +567,7 @@ private:
Option<TaskID> taskId;
Owned<health::HealthChecker> checker;
Option<NetworkInfo> containerNetworkInfo;
+ Option<pid_t> containerPid;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index e5e7b94..f373df1 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -46,6 +46,10 @@
#include "common/status_utils.hpp"
+#ifdef __linux__
+#include "linux/ns.hpp"
+#endif
+
using process::delay;
using process::Clock;
using process::Failure;
@@ -73,10 +77,40 @@ static const string DEFAULT_HTTP_SCHEME = "http";
static const string DEFAULT_DOMAIN = "127.0.0.1";
+#ifdef __linux__
+pid_t cloneWithSetns(
+ const lambda::function<int()>& func,
+ Option<pid_t> taskPid,
+ const vector<string>& namespaces)
+{
+ return process::defaultClone([=]() -> int {
+ if (taskPid.isSome()) {
+ foreach (const string& ns, namespaces) {
+ Try<Nothing> setns = ns::setns(taskPid.get(), ns);
+ if (setns.isError()) {
+ // This effectively aborts the health check.
+ LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
+ << "task (pid: '" << taskPid.get() << "'): "
+ << setns.error();
+ }
+
+ VLOG(1) << "Entered the " << ns << " namespace of "
+ << "task (pid: '" << taskPid.get() << "') successfully";
+ }
+ }
+
+ return func();
+ });
+}
+#endif
+
+
Try<Owned<HealthChecker>> HealthChecker::create(
const HealthCheck& check,
const UPID& executor,
- const TaskID& taskID)
+ const TaskID& taskID,
+ Option<pid_t> taskPid,
+ const vector<string>& namespaces)
{
// Validate the 'HealthCheck' protobuf.
Option<Error> error = validation::healthCheck(check);
@@ -87,7 +121,9 @@ Try<Owned<HealthChecker>> HealthChecker::create(
Owned<HealthCheckerProcess> process(new HealthCheckerProcess(
check,
executor,
- taskID));
+ taskID,
+ taskPid,
+ namespaces));
return Owned<HealthChecker>(new HealthChecker(process));
}
@@ -117,13 +153,24 @@ Future<Nothing> HealthChecker::healthCheck()
HealthCheckerProcess::HealthCheckerProcess(
const HealthCheck& _check,
const UPID& _executor,
- const TaskID& _taskID)
+ const TaskID& _taskID,
+ Option<pid_t> _taskPid,
+ const vector<string>& _namespaces)
: ProcessBase(process::ID::generate("health-checker")),
check(_check),
initializing(true),
executor(_executor),
taskID(_taskID),
- consecutiveFailures(0) {}
+ taskPid(_taskPid),
+ namespaces(_namespaces),
+ consecutiveFailures(0)
+{
+#ifdef __linux__
+ if (!namespaces.empty()) {
+ clone = lambda::bind(&cloneWithSetns, lambda::_1, taskPid, namespaces);
+ }
+#endif
+}
Future<Nothing> HealthCheckerProcess::healthCheck()
@@ -264,7 +311,8 @@ Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
Subprocess::FD(STDERR_FILENO),
Subprocess::FD(STDERR_FILENO),
NO_SETSID,
- environment);
+ environment,
+ clone);
} else {
// Use the exec variant.
vector<string> argv;
@@ -283,7 +331,8 @@ Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
Subprocess::FD(STDERR_FILENO),
NO_SETSID,
nullptr,
- environment);
+ environment,
+ clone);
}
if (external.isError()) {
@@ -353,7 +402,11 @@ Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
argv,
Subprocess::PATH("/dev/null"),
Subprocess::PIPE(),
- Subprocess::PIPE());
+ Subprocess::PIPE(),
+ NO_SETSID,
+ nullptr,
+ None(),
+ clone);
if (s.isError()) {
return Failure("Failed to create the curl subprocess: " + s.error());
@@ -461,7 +514,11 @@ Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
argv,
Subprocess::PATH("/dev/null"),
Subprocess::PIPE(),
- Subprocess::PIPE());
+ Subprocess::PIPE(),
+ NO_SETSID,
+ nullptr,
+ None(),
+ clone);
if (s.isError()) {
return Failure("Failed to create the bash subprocess: " + s.error());
http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 52acdef..392b4d5 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -19,6 +19,7 @@
#include <string>
#include <tuple>
+#include <vector>
#include <mesos/mesos.hpp>
@@ -43,10 +44,25 @@ class HealthCheckerProcess;
class HealthChecker
{
public:
+ /**
+ * Attempts to create a `HealthChecker` object.
+ *
+ * @param check The protobuf message definition of health check.
+ * @param executor The executor UPID to which health check results will be
+ * reported.
+ * @param taskID The TaskID of the target task.
+ * @param taskPid The target task's pid used to enter the specified
+ * namespaces.
+ * @param namespaces The namespaces to enter prior performing a single health
+ * check.
+ * @return A `HealthChecker` object or an error if `create` fails.
+ */
static Try<process::Owned<HealthChecker>> create(
const HealthCheck& check,
const process::UPID& executor,
- const TaskID& taskID);
+ const TaskID& taskID,
+ Option<pid_t> taskPid,
+ const std::vector<std::string>& namespaces);
~HealthChecker();
@@ -65,7 +81,9 @@ public:
HealthCheckerProcess(
const HealthCheck& _check,
const process::UPID& _executor,
- const TaskID& _taskID);
+ const TaskID& _taskID,
+ Option<pid_t> _taskPid,
+ const std::vector<std::string>& _namespaces);
virtual ~HealthCheckerProcess() {}
@@ -104,6 +122,9 @@ private:
bool initializing;
process::UPID executor;
TaskID taskID;
+ Option<pid_t> taskPid;
+ std::vector<std::string> namespaces;
+ Option<lambda::function<pid_t(const lambda::function<int()>&)>> clone;
uint32_t consecutiveFailures;
process::Time startTime;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 71ede1e..5370634 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -418,11 +418,25 @@ protected:
cout << "Forked command at " << pid << endl;
if (task->has_health_check()) {
+ vector<string> namespaces;
+ if (rootfs.isSome() &&
+ task->health_check().type() == HealthCheck::COMMAND) {
+ // Make sure command health checks are run from the task's mount
+ // namespace. Otherwise if rootfs is specified the command binary
+ // may not be available in the executor.
+ //
+ // NOTE: The command executor shares the network namespace
+ // with its task, hence no need to enter it explicitly.
+ namespaces.push_back("mnt");
+ }
+
Try<Owned<health::HealthChecker>> _checker =
health::HealthChecker::create(
task->health_check(),
self(),
- task->task_id());
+ task->task_id(),
+ pid,
+ namespaces);
if (_checker.isError()) {
// TODO(gilbert): Consider ABORT and return a TASK_FAILED here.
[03/10] mesos git commit: Unshared the mount namespace when launching
mesos-containerizer.
Posted by al...@apache.org.
Unshared the mount namespace when launching mesos-containerizer.
When launching a task from the mesos-executor, ensure that the mount
namespace is not shared between the task and the executor if the task
specifies rootfs. Otherwise, `pivot_root` in the mesos-containerizer
binary would affect the mount namespace of the executor and possibly
prevent it from accessing some binaries or libraries.
Review: https://reviews.apache.org/r/51266/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/564eda0a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/564eda0a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/564eda0a
Branch: refs/heads/master
Commit: 564eda0a5bbb74a9a6b2b8dfdc4f34c0a213d3c6
Parents: 537584c
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:31:07 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200
----------------------------------------------------------------------
src/launcher/posix/executor.cpp | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/564eda0a/src/launcher/posix/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/posix/executor.cpp b/src/launcher/posix/executor.cpp
index 50b9b30..7c40ebe 100644
--- a/src/launcher/posix/executor.cpp
+++ b/src/launcher/posix/executor.cpp
@@ -60,6 +60,9 @@ pid_t launchTaskPosix(
const Option<string>& sandboxDirectory,
const Option<string>& workingDirectory)
{
+ // Prepare the flags to pass to the launch process.
+ MesosContainerizerLaunch::Flags launchFlags;
+
if (rootfs.isSome()) {
// The command executor is responsible for chrooting into the
// root filesystem and changing the user before exec-ing the
@@ -73,14 +76,16 @@ pid_t launchTaskPosix(
} else if (_user.get() != "root") {
ABORT("The command executor requires root with rootfs");
}
+
+ // Ensure that mount namespace of the executor is not affected by
+ // changes in its task's namespace induced by calling `pivot_root`
+ // as part of the task setup in mesos-containerizer binary.
+ launchFlags.unshare_namespace_mnt = true;
#else
ABORT("Not expecting root volume with non-linux platform");
#endif // __linux__
}
- // Prepare the flags to pass to the launch process.
- MesosContainerizerLaunch::Flags launchFlags;
-
launchFlags.command = JSON::protobuf(command);
if (rootfs.isSome()) {
[10/10] mesos git commit: Updated the CHANGELOG with HTTP(S) and TCP
health checks summary.
Posted by al...@apache.org.
Updated the CHANGELOG with HTTP(S) and TCP health checks summary.
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/037a346a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/037a346a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/037a346a
Branch: refs/heads/master
Commit: 037a346a205ad7bdba99d771855f8caeea835d4a
Parents: bb084fb
Author: Alexander Rukletsov <al...@apache.org>
Authored: Sun Aug 28 16:27:42 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:43:21 2016 +0200
----------------------------------------------------------------------
CHANGELOG | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/037a346a/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 587d843..eadf546 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,8 +2,20 @@ Release Notes - Mesos - Version 1.1.0 (WIP)
--------------------------------------------
This release contains the following new features:
-Deprecations:
+ * [MESOS-2533] - Support for HTTP and HTTPS health checks. Executors may now
+ use the updated `HealthCheck` protobuf to implement HTTP(S) health checks.
+ Both default executors (command and docker) leverage `curl` binary for
+ sending HTTP(S) requests and connect to `127.0.0.1`, hence a task must
+ listen on all interfaces. On Linux, For BRIDGE and USER modes, docker
+ executor enters the task's network namespace.
+
+ * [MESOS-3567] - Support for TCP health checks. Executors may now use the
+ updated `HealthCheck` protobuf to implement TCP health checks. Both default
+ executors (command and docker) connect to `127.0.0.1`, hence a task must
+ listen on all interfaces. On Linux, For BRIDGE and USER modes, docker
+ executor enters the task's network namespace.
+Deprecations:
* The following metrics are deprecated and will be removed in Mesos 1.4:
master/slave_shutdowns_scheduled, master/slave_shutdowns_canceled, and
slave_shutdowns_completed. As of Mesos 1.1.0, these metrics will always be
@@ -11,6 +23,8 @@ Deprecations:
master/slave_unreachable_scheduled, master/slave_unreachable_canceled,
and master/slave_unreachable_completed.
+ * [MESOS-5955] - Health check binary "mesos-health-check" is removed.
+
Release Notes - Mesos - Version 1.0.1
--------------------------------------------
[06/10] mesos git commit: Moved `process::internal::defaultClone` to
`process::`.
Posted by al...@apache.org.
Moved `process::internal::defaultClone` to `process::`.
Review: https://reviews.apache.org/r/51378/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d2f8bfab
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d2f8bfab
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d2f8bfab
Branch: refs/heads/master
Commit: d2f8bfabe48bee5533c30afaecbcf76ef081248f
Parents: 1d00492
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:34:01 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200
----------------------------------------------------------------------
.../include/process/posix/subprocess.hpp | 33 ++++++++++----------
1 file changed, 17 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/d2f8bfab/3rdparty/libprocess/include/process/posix/subprocess.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/include/process/posix/subprocess.hpp b/3rdparty/libprocess/include/process/posix/subprocess.hpp
index a871fe4..57f71d1 100644
--- a/3rdparty/libprocess/include/process/posix/subprocess.hpp
+++ b/3rdparty/libprocess/include/process/posix/subprocess.hpp
@@ -51,6 +51,23 @@ namespace process {
using InputFileDescriptors = Subprocess::IO::InputFileDescriptors;
using OutputFileDescriptors = Subprocess::IO::OutputFileDescriptors;
+
+inline pid_t defaultClone(const lambda::function<int()>& func)
+{
+ pid_t pid = ::fork();
+ if (pid == -1) {
+ return -1;
+ } else if (pid == 0) {
+ // Child.
+ ::exit(func());
+ UNREACHABLE();
+ } else {
+ // Parent.
+ return pid;
+ }
+}
+
+
namespace internal {
// This function will invoke `os::close` on all specified file
@@ -100,22 +117,6 @@ inline Try<Nothing> cloexec(
}
-inline pid_t defaultClone(const lambda::function<int()>& func)
-{
- pid_t pid = ::fork();
- if (pid == -1) {
- return -1;
- } else if (pid == 0) {
- // Child.
- ::exit(func());
- UNREACHABLE();
- } else {
- // Parent.
- return pid;
- }
-}
-
-
inline void signalHandler(int signal)
{
// Send SIGKILL to every process in the process group of the
[08/10] mesos git commit: Added support for TCP health checks.
Posted by al...@apache.org.
Added support for TCP health checks.
Review: https://reviews.apache.org/r/49360/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1d004920
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1d004920
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1d004920
Branch: refs/heads/master
Commit: 1d00492086a3b8298c2f23fd5196d31d0bfef73d
Parents: 2b80207
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:52 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200
----------------------------------------------------------------------
src/health-check/health_checker.cpp | 80 +++++++++++++++++++++++++++++++-
src/health-check/health_checker.hpp | 6 +++
2 files changed, 85 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/1d004920/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 4dd573b..e5e7b94 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -444,7 +444,85 @@ Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
CHECK_EQ(HealthCheck::TCP, check.type());
CHECK(check.has_tcp());
- promise.fail("TCP health check is not supported");
+ const HealthCheck::TCPCheckInfo& tcp = check.tcp();
+
+ VLOG(1) << "Launching TCP health check at port '" << tcp.port() << "'";
+
+ // TODO(haosdent): Replace `bash` with a tiny binary to support
+ // TCP health check with half-open.
+ const vector<string> argv = {
+ "bash",
+ "-c",
+ "</dev/tcp/" + DEFAULT_DOMAIN + "/" + stringify(tcp.port())
+ };
+
+ Try<Subprocess> s = subprocess(
+ "bash",
+ argv,
+ Subprocess::PATH("/dev/null"),
+ Subprocess::PIPE(),
+ Subprocess::PIPE());
+
+ if (s.isError()) {
+ return Failure("Failed to create the bash subprocess: " + s.error());
+ }
+
+ pid_t bashPid = s->pid();
+ Duration timeout = Seconds(check.timeout_seconds());
+
+ return await(
+ s->status(),
+ process::io::read(s->out().get()),
+ process::io::read(s->err().get()))
+ .after(timeout,
+ [timeout, bashPid](Future<tuple<Future<Option<int>>,
+ Future<string>,
+ Future<string>>> future) {
+ future.discard();
+
+ if (bashPid != -1) {
+ // Cleanup the bash process.
+ VLOG(1) << "Killing the TCP health check process " << bashPid;
+
+ os::killtree(bashPid, SIGKILL);
+ }
+
+ return Failure(
+ "bash has not returned after " + stringify(timeout) + "; aborting");
+ })
+ .then(defer(self(), &Self::__tcpHealthCheck, lambda::_1));
+}
+
+
+Future<Nothing> HealthCheckerProcess::__tcpHealthCheck(
+ const tuple<
+ Future<Option<int>>,
+ Future<string>,
+ Future<string>>& t)
+{
+ Future<Option<int>> status = std::get<0>(t);
+ if (!status.isReady()) {
+ return Failure(
+ "Failed to get the exit status of the bash process: " +
+ (status.isFailed() ? status.failure() : "discarded"));
+ }
+
+ if (status->isNone()) {
+ return Failure("Failed to reap the bash process");
+ }
+
+ int statusCode = status->get();
+ if (statusCode != 0) {
+ Future<string> error = std::get<2>(t);
+ if (!error.isReady()) {
+ return Failure("bash returned " + WSTRINGIFY(statusCode) +
+ "; reading stderr failed: " +
+ (error.isFailed() ? error.failure() : "discarded"));
+ }
+
+ return Failure("bash returned " + WSTRINGIFY(statusCode) + ": " +
+ error.get());
+ }
return Nothing();
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/1d004920/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 83a3384..52acdef 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -91,6 +91,12 @@ private:
process::Future<Nothing> _tcpHealthCheck();
+ process::Future<Nothing> __tcpHealthCheck(
+ const std::tuple<
+ process::Future<Option<int>>,
+ process::Future<std::string>,
+ process::Future<std::string>>& t);
+
void reschedule();
process::Promise<Nothing> promise;
[02/10] mesos git commit: Updated the docker executor to use health
checks via the library.
Posted by al...@apache.org.
Updated the docker executor to use health checks via the library.
We updated the command executor to use health checks via library in
https://reviews.apache.org/r/49389/. This patch updates the docker
executor for consistency.
Review: https://reviews.apache.org/r/49555/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b16e5fa0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b16e5fa0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b16e5fa0
Branch: refs/heads/master
Commit: b16e5fa04575aa632f5e3a55c1d6320cb09add89
Parents: 564eda0
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:31:32 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200
----------------------------------------------------------------------
src/docker/executor.cpp | 73 ++++++++++++++------------------------------
1 file changed, 23 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/b16e5fa0/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 8d679cd..5508819 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -84,14 +84,13 @@ public:
const string& sandboxDirectory,
const string& mappedDirectory,
const Duration& shutdownGracePeriod,
- const string& healthCheckDir,
+ const string& launcherDir,
const map<string, string>& taskEnvironment)
: ProcessBase(ID::generate("docker-executor")),
killed(false),
killedByHealthCheck(false),
terminated(false),
- healthPid(-1),
- healthCheckDir(healthCheckDir),
+ launcherDir(launcherDir),
docker(docker),
containerName(containerName),
sandboxDirectory(sandboxDirectory),
@@ -340,17 +339,6 @@ private:
inspect
.onAny(defer(self(), &Self::_killTask, _taskId, gracePeriod));
}
-
- // Cleanup health check process.
- //
- // TODO(bmahler): Consider doing this after the task has been
- // reaped, since a framework may be interested in health
- // information while the task is being killed (consider a
- // task that takes 30 minutes to be cleanly killed).
- if (healthPid != -1) {
- os::killtree(healthPid, SIGKILL);
- healthPid = -1;
- }
}
void _killTask(const TaskID& taskId_, const Duration& gracePeriod)
@@ -522,41 +510,26 @@ private:
strings::join(" ", commandArguments));
}
- JSON::Object json = JSON::protobuf(healthCheck);
-
- const string path = path::join(healthCheckDir, "mesos-health-check");
+ Try<Owned<health::HealthChecker>> _checker = health::HealthChecker::create(
+ healthCheck,
+ self(),
+ task.task_id());
- // Launch the subprocess using 'exec' style so that quotes can
- // be properly handled.
- vector<string> checkerArguments;
- checkerArguments.push_back(path);
- checkerArguments.push_back("--executor=" + stringify(self()));
- checkerArguments.push_back("--health_check_json=" + stringify(json));
- checkerArguments.push_back("--task_id=" + task.task_id().value());
-
- cout << "Launching health check process: "
- << strings::join(" ", checkerArguments) << endl;
-
- Try<Subprocess> healthProcess =
- process::subprocess(
- path,
- checkerArguments,
- // Intentionally not sending STDIN to avoid health check
- // commands that expect STDIN input to block.
- Subprocess::PATH("/dev/null"),
- Subprocess::FD(STDOUT_FILENO),
- Subprocess::FD(STDERR_FILENO));
+ if (_checker.isError()) {
+ // TODO(gilbert): Consider ABORT and return a TASK_FAILED here.
+ cerr << "Failed to create health checker: "
+ << _checker.error() << endl;
+ } else {
+ checker = _checker.get();
- if (healthProcess.isError()) {
- cerr << "Unable to launch health process: "
- << healthProcess.error() << endl;
- return;
+ checker->healthCheck()
+ .onAny([](const Future<Nothing>& future) {
+ // Only possible to be a failure.
+ if (future.isFailed()) {
+ cerr << "Health check failed:" << future.failure() << endl;
+ }
+ });
}
-
- healthPid = healthProcess.get().pid();
-
- cout << "Health check process launched at pid: "
- << stringify(healthPid) << endl;
}
// TODO(alexr): Introduce a state enum and document transitions,
@@ -565,8 +538,7 @@ private:
bool killedByHealthCheck;
bool terminated;
- pid_t healthPid;
- string healthCheckDir;
+ string launcherDir;
Owned<Docker> docker;
string containerName;
string sandboxDirectory;
@@ -581,6 +553,7 @@ private:
Option<ExecutorDriver*> driver;
Option<FrameworkInfo> frameworkInfo;
Option<TaskID> taskId;
+ Owned<health::HealthChecker> checker;
Option<NetworkInfo> containerNetworkInfo;
};
@@ -594,7 +567,7 @@ public:
const string& sandboxDirectory,
const string& mappedDirectory,
const Duration& shutdownGracePeriod,
- const string& healthCheckDir,
+ const string& launcherDir,
const map<string, string>& taskEnvironment)
{
process = Owned<DockerExecutorProcess>(new DockerExecutorProcess(
@@ -603,7 +576,7 @@ public:
sandboxDirectory,
mappedDirectory,
shutdownGracePeriod,
- healthCheckDir,
+ launcherDir,
taskEnvironment));
spawn(process.get());
[07/10] mesos git commit: Added support for HTTP/HTTPS health checks.
Posted by al...@apache.org.
Added support for HTTP/HTTPS health checks.
Review: https://reviews.apache.org/r/36816/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2b80207d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2b80207d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2b80207d
Branch: refs/heads/master
Commit: 2b80207d1f05aa9dcb5747d161de84aada41643c
Parents: 7380d13
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:41 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200
----------------------------------------------------------------------
src/health-check/health_checker.cpp | 117 ++++++++++++++++++++++++++++++-
src/health-check/health_checker.hpp | 7 ++
2 files changed, 123 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/2b80207d/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 097117a..4dd573b 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -31,6 +31,8 @@
#include <process/collect.hpp>
#include <process/delay.hpp>
+#include <process/http.hpp>
+#include <process/io.hpp>
#include <process/subprocess.hpp>
#include <stout/duration.hpp>
@@ -57,12 +59,20 @@ using process::UPID;
using std::map;
using std::string;
+using std::tuple;
using std::vector;
namespace mesos {
namespace internal {
namespace health {
+static const string DEFAULT_HTTP_SCHEME = "http";
+
+// Use '127.0.0.1' instead of 'localhost', because the host
+// file in some container images may not contain 'localhost'.
+static const string DEFAULT_DOMAIN = "127.0.0.1";
+
+
Try<Owned<HealthChecker>> HealthChecker::create(
const HealthCheck& check,
const UPID& executor,
@@ -318,7 +328,112 @@ Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
CHECK_EQ(HealthCheck::HTTP, check.type());
CHECK(check.has_http());
- promise.fail("HTTP health check is not supported");
+ const HealthCheck::HTTPCheckInfo& http = check.http();
+
+ const string scheme = http.has_scheme() ? http.scheme() : DEFAULT_HTTP_SCHEME;
+ const string path = http.has_path() ? http.path() : "";
+ const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
+ stringify(http.port()) + path;
+
+ VLOG(1) << "Launching HTTP health check '" << url << "'";
+
+ const vector<string> argv = {
+ "curl",
+ "-s", // Don't show progress meter or error messages.
+ "-S", // Makes curl show an error message if it fails.
+ "-L", // Follows HTTP 3xx redirects.
+ "-k", // Ignores SSL validation when scheme is https.
+ "-w", "%{http_code}", // Displays HTTP response code on stdout.
+ "-o", "/dev/null", // Ignores output.
+ url
+ };
+
+ Try<Subprocess> s = subprocess(
+ "curl",
+ argv,
+ Subprocess::PATH("/dev/null"),
+ Subprocess::PIPE(),
+ Subprocess::PIPE());
+
+ if (s.isError()) {
+ return Failure("Failed to create the curl subprocess: " + s.error());
+ }
+
+ pid_t curlPid = s->pid();
+ Duration timeout = Seconds(check.timeout_seconds());
+
+ return await(
+ s->status(),
+ process::io::read(s->out().get()),
+ process::io::read(s->err().get()))
+ .after(timeout,
+ [timeout, curlPid](Future<tuple<Future<Option<int>>,
+ Future<string>,
+ Future<string>>> future) {
+ future.discard();
+
+ if (curlPid != -1) {
+ // Cleanup the curl process.
+ VLOG(1) << "Killing the HTTP health check process " << curlPid;
+
+ os::killtree(curlPid, SIGKILL);
+ }
+
+ return Failure(
+ "curl has not returned after " + stringify(timeout) + "; aborting");
+ })
+ .then(defer(self(), &Self::__httpHealthCheck, lambda::_1));
+}
+
+
+Future<Nothing> HealthCheckerProcess::__httpHealthCheck(
+ const tuple<
+ Future<Option<int>>,
+ Future<string>,
+ Future<string>>& t)
+{
+ Future<Option<int>> status = std::get<0>(t);
+ if (!status.isReady()) {
+ return Failure(
+ "Failed to get the exit status of the curl process: " +
+ (status.isFailed() ? status.failure() : "discarded"));
+ }
+
+ if (status->isNone()) {
+ return Failure("Failed to reap the curl process");
+ }
+
+ int statusCode = status->get();
+ if (statusCode != 0) {
+ Future<string> error = std::get<2>(t);
+ if (!error.isReady()) {
+ return Failure("curl returned " + WSTRINGIFY(statusCode) +
+ "; reading stderr failed: " +
+ (error.isFailed() ? error.failure() : "discarded"));
+ }
+
+ return Failure("curl returned " + WSTRINGIFY(statusCode) + ": " +
+ error.get());
+ }
+
+ Future<string> output = std::get<1>(t);
+ if (!output.isReady()) {
+ return Failure("Failed to read stdout from curl: " +
+ (output.isFailed() ? output.failure() : "discarded"));
+ }
+
+ // Parse the output and get the HTTP response code.
+ Try<int> code = numify<int>(output.get());
+ if (code.isError()) {
+ return Failure("Unexpected output from curl: " + output.get());
+ }
+
+ if (code.get() < process::http::Status::OK ||
+ code.get() >= process::http::Status::BAD_REQUEST) {
+ return Failure(
+ "Unexpected HTTP response code: " +
+ process::http::Status::string(code.get()));
+ }
return Nothing();
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/2b80207d/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 83cedfb..83a3384 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -18,6 +18,7 @@
#define __HEALTH_CHECKER_HPP__
#include <string>
+#include <tuple>
#include <mesos/mesos.hpp>
@@ -82,6 +83,12 @@ private:
process::Future<Nothing> _httpHealthCheck();
+ process::Future<Nothing> __httpHealthCheck(
+ const std::tuple<
+ process::Future<Option<int>>,
+ process::Future<std::string>,
+ process::Future<std::string>>& t);
+
process::Future<Nothing> _tcpHealthCheck();
void reschedule();