You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2016/08/28 15:04:01 UTC

[01/10] mesos git commit: Removed the health check binary from libprocess.

Repository: mesos
Updated Branches:
  refs/heads/master 537584cf2 -> 037a346a2


Removed the health check binary from libprocess.

With https://reviews.apache.org/r/49389/ and
https://reviews.apache.org/r/49555/, both command and
docker executors use health checks via the library. Hence
the mesos-health-check binary is not necessary anymore.

Review: https://reviews.apache.org/r/50657/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c09bb378
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c09bb378
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c09bb378

Branch: refs/heads/master
Commit: c09bb378c4b31c3a28accd01b6d6522372ced1ab
Parents: b16e5fa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:32:20 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200

----------------------------------------------------------------------
 3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake | 5 -----
 1 file changed, 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c09bb378/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake b/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
index 497f610..26f95da 100644
--- a/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
+++ b/3rdparty/libprocess/cmake/Process3rdpartyConfigure.cmake
@@ -152,11 +152,6 @@ set(
   MESOS_FETCHER mesos-fetcher
   CACHE STRING "Target for fetcher")
 
-# Define target for health-check.
-set(
-  MESOS_HEALTH_CHECK mesos-health-check
-  CACHE STRING "Target for healt-check")
-
 # Define target for master.
 set(
   MESOS_MASTER mesos-master


[04/10] mesos git commit: Removed the health check binary.

Posted by al...@apache.org.
Removed the health check binary.

With https://reviews.apache.org/r/49389/ and
https://reviews.apache.org/r/49555/, both command and
docker executors use health checks via the library. Hence
the mesos-health-check binary is not necessary anymore.

Review: https://reviews.apache.org/r/49556/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f19a6aa0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f19a6aa0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f19a6aa0

Branch: refs/heads/master
Commit: f19a6aa07d418b7469745248b5b1c23ccf155615
Parents: c09bb37
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:32:58 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:55 2016 +0200

----------------------------------------------------------------------
 src/CMakeLists.txt              |   1 -
 src/Makefile.am                 |   5 -
 src/health-check/CMakeLists.txt |  46 ---------
 src/health-check/main.cpp       | 176 -----------------------------------
 4 files changed, 228 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ccd9892..b14132a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -516,7 +516,6 @@ target_link_libraries(${MESOS_PROTOBUF_TARGET} ${AGENT_LIBS})
 ##############################
 add_subdirectory(cli/)
 add_subdirectory(docker/)
-add_subdirectory(health-check/)
 add_subdirectory(launcher/)
 add_subdirectory(local/)
 add_subdirectory(master/)

http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index b577b42..a307482 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1356,11 +1356,6 @@ mesos_network_helper_CPPFLAGS = $(MESOS_CPPFLAGS)
 mesos_network_helper_LDADD = libmesos.la $(LDADD)
 endif
 
-pkglibexec_PROGRAMS += mesos-health-check
-mesos_health_check_SOURCES = health-check/main.cpp
-mesos_health_check_CPPFLAGS = $(MESOS_CPPFLAGS)
-mesos_health_check_LDADD = libmesos.la $(LDADD)
-
 pkglibexec_PROGRAMS += mesos-usage
 mesos_usage_SOURCES = usage/main.cpp
 mesos_usage_CPPFLAGS = $(MESOS_CPPFLAGS)

http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/health-check/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/health-check/CMakeLists.txt b/src/health-check/CMakeLists.txt
deleted file mode 100644
index 69e9566..0000000
--- a/src/health-check/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# MESOS HEALTH SOURCE.
-######################
-set(HEALTH_EXECUTABLE_SRC
-  ${HEALTH_EXECUTABLE_SRC}
-  main.cpp
-  )
-
-# INCLUDE DIRECTIVES FOR MESOS HEALTH CHECK EXECUTABLE (generates, e.g.,
-# -I/path/to/thing on Linux).
-########################################################################
-include_directories(${AGENT_INCLUDE_DIRS})
-
-# LINKING LIBRARIES BY DIRECTORY (might generate, e.g., -L/path/to/thing on
-# Linux).
-###########################################################################
-link_directories(${AGENT_LIB_DIRS})
-
-# THE MESOS HELATH CHECK EXECUTABLE.
-####################################
-add_executable(${MESOS_HEALTH_CHECK} ${HEALTH_EXECUTABLE_SRC})
-
-# ADD LINKER FLAGS (generates, e.g., -lglog on Linux).
-######################################################
-target_link_libraries(${MESOS_HEALTH_CHECK} ${AGENT_LIBS} ${MESOS_TARGET})
-
-
-# ADD BINARY DEPENDENCIES (tells CMake what to compile/build first).
-####################################################################
-add_dependencies(${MESOS_HEALTH_CHECK} ${MESOS_TARGET})

http://git-wip-us.apache.org/repos/asf/mesos/blob/f19a6aa0/src/health-check/main.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/main.cpp b/src/health-check/main.cpp
deleted file mode 100644
index 5346e30..0000000
--- a/src/health-check/main.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <signal.h>
-#include <stdio.h>
-#include <string.h>
-#ifndef __WINDOWS__
-#include <unistd.h>
-#endif // __WINDOWS__
-
-#include <iostream>
-#include <string>
-
-#include <mesos/mesos.hpp>
-
-#include <process/future.hpp>
-#include <process/pid.hpp>
-#include <process/process.hpp>
-#include <process/protobuf.hpp>
-
-#include <stout/flags.hpp>
-#include <stout/json.hpp>
-#include <stout/option.hpp>
-#include <stout/path.hpp>
-#include <stout/protobuf.hpp>
-
-#include "health-check/health_checker.hpp"
-
-using namespace mesos;
-
-using std::cout;
-using std::cerr;
-using std::endl;
-using std::string;
-
-using process::UPID;
-
-class Flags : public virtual flags::FlagsBase
-{
-public:
-  Flags()
-  {
-    add(&Flags::health_check_json,
-        "health_check_json",
-        "JSON describing health check to perform");
-
-    add(&Flags::executor,
-        "executor",
-        "Executor UPID to send health check messages to");
-
-    add(&Flags::task_id,
-        "task_id",
-        "Task ID that this health check process is checking");
-  }
-
-  Option<string> health_check_json;
-  Option<UPID> executor;
-  Option<string> task_id;
-};
-
-
-void usage(const char* argv0, const flags::FlagsBase& flags)
-{
-  cerr << "Usage: " << Path(argv0).basename() << " [...]" << endl
-       << endl
-       << "Supported options:" << endl
-       << flags.usage();
-}
-
-
-int main(int argc, char** argv)
-{
-  GOOGLE_PROTOBUF_VERIFY_VERSION;
-
-  Flags flags;
-
-  Try<flags::Warnings> load = flags.load(None(), argc, argv);
-
-  if (load.isError()) {
-    cerr << flags.usage(load.error()) << endl;
-    return EXIT_FAILURE;
-  }
-
-  if (flags.help) {
-    cout << flags.usage() << endl;
-    return EXIT_SUCCESS;
-  }
-
-  // Log any flag warnings.
-  foreach (const flags::Warning& warning, load->warnings) {
-    LOG(WARNING) << warning.message;
-  }
-
-  if (flags.health_check_json.isNone()) {
-    cerr << flags.usage("Expected JSON with health check description") << endl;
-    return EXIT_FAILURE;
-  }
-
-  Try<JSON::Object> parse =
-    JSON::parse<JSON::Object>(flags.health_check_json.get());
-
-  if (parse.isError()) {
-    cerr << flags.usage("Failed to parse --health_check_json: " + parse.error())
-         << endl;
-    return EXIT_FAILURE;
-  }
-
-  Try<HealthCheck> check = protobuf::parse<HealthCheck>(parse.get());
-
-  if (check.isError()) {
-    cerr << flags.usage("Failed to parse --health_check_json: " + check.error())
-         << endl;
-    return EXIT_SUCCESS;
-  }
-
-  if (flags.executor.isNone()) {
-    cerr << flags.usage("Missing required option --executor") << endl;
-    return EXIT_FAILURE;
-  }
-
-  if (check.get().has_http() && check.get().has_command()) {
-    cerr << flags.usage("Both 'http' and 'command' health check requested")
-         << endl;
-    return EXIT_FAILURE;
-  }
-
-  if (!check.get().has_http() && !check.get().has_command()) {
-    cerr << flags.usage("Expecting one of 'http' or 'command' health check")
-         << endl;
-    return EXIT_FAILURE;
-  }
-
-  if (flags.task_id.isNone()) {
-    cerr << flags.usage("Missing required option --task_id") << endl;
-    return EXIT_FAILURE;
-  }
-
-  TaskID taskID;
-  taskID.set_value(flags.task_id.get());
-
-  mesos::internal::health::HealthCheckerProcess process(
-    check.get(),
-    flags.executor.get(),
-    taskID);
-
-  process::spawn(&process);
-
-  process::Future<Nothing> checking =
-    process::dispatch(
-      process, &mesos::internal::health::HealthCheckerProcess::healthCheck);
-
-  checking.await();
-
-  process::terminate(process);
-  process::wait(process);
-
-  if (checking.isFailed()) {
-    LOG(WARNING) << "Health check failed " << checking.failure();
-    return EXIT_FAILURE;
-  }
-
-  return EXIT_SUCCESS;
-}


[05/10] mesos git commit: Refactored `_commandHealthCheck` in HealthChecker.

Posted by al...@apache.org.
Refactored `_commandHealthCheck` in HealthChecker.

* Remove blocking `Future::await` call.
* Adjust the level of some logs.
* Adjust style.
* Change the interface of health check handlers to
  `Future<Nothing>` to make errors handling more easier.

Review: https://reviews.apache.org/r/51069/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7380d130
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7380d130
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7380d130

Branch: refs/heads/master
Commit: 7380d130a4b03fd110acbcb0636f8615b8f28cee
Parents: f19a6aa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:12 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:41 2016 +0200

----------------------------------------------------------------------
 src/health-check/health_checker.cpp | 123 ++++++++++++++++++-------------
 src/health-check/health_checker.hpp |  10 ++-
 2 files changed, 78 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7380d130/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 1a2a6df..097117a 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -29,6 +29,7 @@
 
 #include <mesos/mesos.hpp>
 
+#include <process/collect.hpp>
 #include <process/delay.hpp>
 #include <process/subprocess.hpp>
 
@@ -45,6 +46,7 @@
 
 using process::delay;
 using process::Clock;
+using process::Failure;
 using process::Future;
 using process::NO_SETSID;
 using process::Owned;
@@ -116,9 +118,9 @@ HealthCheckerProcess::HealthCheckerProcess(
 
 Future<Nothing> HealthCheckerProcess::healthCheck()
 {
-  VLOG(2) << "Health checks starting in "
-    << Seconds(check.delay_seconds()) << ", grace period "
-    << Seconds(check.grace_period_seconds());
+  VLOG(1) << "Health check starting in "
+          << Seconds(check.delay_seconds()) << ", grace period "
+          << Seconds(check.grace_period_seconds());
 
   startTime = Clock::now();
 
@@ -137,7 +139,8 @@ void HealthCheckerProcess::failure(const string& message)
   }
 
   consecutiveFailures++;
-  VLOG(1) << "#" << consecutiveFailures << " check failed: " << message;
+  LOG(WARNING) << "Health check failed " << consecutiveFailures
+               << " times consecutively: " << message;
 
   bool killTask = consecutiveFailures >= check.consecutive_failures();
 
@@ -163,7 +166,7 @@ void HealthCheckerProcess::failure(const string& message)
 
 void HealthCheckerProcess::success()
 {
-  VLOG(1) << "Check passed";
+  VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check passed";
 
   // Send a healthy status update on the first success,
   // and on the first success following failure(s).
@@ -182,30 +185,49 @@ void HealthCheckerProcess::success()
 
 void HealthCheckerProcess::_healthCheck()
 {
+  Future<Nothing> checkResult;
+
   switch (check.type()) {
     case HealthCheck::COMMAND: {
-      _commandHealthCheck();
-      return;
+      checkResult = _commandHealthCheck();
+      break;
     }
 
     case HealthCheck::HTTP: {
-      _httpHealthCheck();
-      return;
+      checkResult = _httpHealthCheck();
+      break;
     }
 
     case HealthCheck::TCP: {
-      _tcpHealthCheck();
-      return;
+      checkResult = _tcpHealthCheck();
+      break;
     }
 
     default: {
       UNREACHABLE();
     }
   }
+
+  checkResult.onAny(defer(self(), &Self::__healthCheck, lambda::_1));
+}
+
+
+void HealthCheckerProcess::__healthCheck(const Future<Nothing>& future)
+{
+  if (future.isReady()) {
+    success();
+    return;
+  }
+
+  string message = HealthCheck::Type_Name(check.type()) +
+                   " health check failed: " +
+                   (future.isFailed() ? future.failure() : "discarded");
+
+  failure(message);
 }
 
 
-void HealthCheckerProcess::_commandHealthCheck()
+Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
 {
   CHECK_EQ(HealthCheck::COMMAND, check.type());
   CHECK(check.has_command());
@@ -220,11 +242,11 @@ void HealthCheckerProcess::_commandHealthCheck()
   }
 
   // Launch the subprocess.
-  Option<Try<Subprocess>> external = None();
+  Try<Subprocess> external = Error("Not launched");
 
   if (command.shell()) {
     // Use the shell variant.
-    VLOG(2) << "Launching health command '" << command.value() << "'";
+    VLOG(1) << "Launching command health check '" << command.value() << "'";
 
     external = subprocess(
         command.value(),
@@ -240,7 +262,7 @@ void HealthCheckerProcess::_commandHealthCheck()
       argv.push_back(arg);
     }
 
-    VLOG(2) << "Launching health command [" << command.value() << ", "
+    VLOG(1) << "Launching command health check [" << command.value() << ", "
             << strings::join(", ", argv) << "]";
 
     external = subprocess(
@@ -254,72 +276,69 @@ void HealthCheckerProcess::_commandHealthCheck()
         environment);
   }
 
-  CHECK_SOME(external);
-
-  if (external.get().isError()) {
-    failure("Error creating subprocess for healthcheck: " +
-            external.get().error());
-    return;
+  if (external.isError()) {
+    return Failure("Failed to create subprocess: " + external.error());
   }
 
-  pid_t commandPid = external.get().get().pid();
+  pid_t commandPid = external->pid();
+  Duration timeout = Seconds(check.timeout_seconds());
 
-  Future<Option<int>> status = external.get().get().status();
-  status.await(Seconds(check.timeout_seconds()));
+  return external->status()
+    .after(timeout, [timeout, commandPid](Future<Option<int>> future) {
+      future.discard();
 
-  if (!status.isReady()) {
-    string msg = "Command check failed with reason: ";
-    if (status.isFailed()) {
-      msg += "failed with error: " + status.failure();
-    } else if (status.isDiscarded()) {
-      msg += "status future discarded";
-    } else {
-      msg += "status still pending after timeout " +
-             stringify(Seconds(check.timeout_seconds()));
-    }
+      if (commandPid != -1) {
+        // Cleanup the external command process.
+        VLOG(1) << "Killing the command health check process " << commandPid;
 
-    if (commandPid != -1) {
-      // Cleanup the external command process.
-      os::killtree(commandPid, SIGKILL);
-      VLOG(1) << "Kill health check command " << commandPid;
-    }
+        os::killtree(commandPid, SIGKILL);
+      }
 
-    failure(msg);
-    return;
-  }
+      return Failure(
+          "Command has not returned after " + stringify(timeout) +
+          "; aborting");
+    })
+    .then([](const Option<int>& status) -> Future<Nothing> {
+      if (status.isNone()) {
+        return Failure("Failed to reap the command process");
+      }
 
-  int statusCode = status.get().get();
-  if (statusCode != 0) {
-    string message = "Health command check " + WSTRINGIFY(statusCode);
-    failure(message);
-  } else {
-    success();
-  }
+      int statusCode = status.get();
+      if (statusCode != 0) {
+        return Failure("Command returned " + WSTRINGIFY(statusCode));
+      }
+
+      return Nothing();
+    });
 }
 
 
-void HealthCheckerProcess::_httpHealthCheck()
+Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
 {
   CHECK_EQ(HealthCheck::HTTP, check.type());
   CHECK(check.has_http());
 
   promise.fail("HTTP health check is not supported");
+
+  return Nothing();
 }
 
 
-void HealthCheckerProcess::_tcpHealthCheck()
+Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
 {
   CHECK_EQ(HealthCheck::TCP, check.type());
   CHECK(check.has_tcp());
 
   promise.fail("TCP health check is not supported");
+
+  return Nothing();
 }
 
 
 void HealthCheckerProcess::reschedule()
 {
   VLOG(1) << "Rescheduling health check in "
-    << Seconds(check.interval_seconds());
+          << Seconds(check.interval_seconds());
 
   delay(Seconds(check.interval_seconds()), self(), &Self::_healthCheck);
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/7380d130/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index b4548f3..83cedfb 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -76,9 +76,13 @@ private:
 
   void _healthCheck();
 
-  void _commandHealthCheck();
-  void _httpHealthCheck();
-  void _tcpHealthCheck();
+  void __healthCheck(const process::Future<Nothing>& future);
+
+  process::Future<Nothing> _commandHealthCheck();
+
+  process::Future<Nothing> _httpHealthCheck();
+
+  process::Future<Nothing> _tcpHealthCheck();
 
   void reschedule();
 


[09/10] mesos git commit: Entered appropriate task's namespaces during health checking.

Posted by al...@apache.org.
Entered appropriate task's namespaces during health checking.

Review: https://reviews.apache.org/r/51379/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bb084fb8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bb084fb8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bb084fb8

Branch: refs/heads/master
Commit: bb084fb84cb59ed8eb1a554b17141d09ff0ed262
Parents: d2f8bfa
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:34:41 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:58 2016 +0200

----------------------------------------------------------------------
 src/docker/executor.cpp             | 15 ++++++-
 src/health-check/health_checker.cpp | 73 ++++++++++++++++++++++++++++----
 src/health-check/health_checker.hpp | 25 ++++++++++-
 src/launcher/executor.cpp           | 16 ++++++-
 4 files changed, 117 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 5508819..f5981b0 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -181,6 +181,8 @@ public:
     inspect = docker->inspect(containerName, DOCKER_INSPECT_DELAY)
       .then(defer(self(), [=](const Docker::Container& container) {
         if (!killed) {
+          containerPid = container.pid;
+
           TaskStatus status;
           status.mutable_task_id()->CopyFrom(taskId.get());
           status.set_state(TASK_RUNNING);
@@ -510,10 +512,20 @@ private:
           strings::join(" ", commandArguments));
     }
 
+    vector<string> namespaces;
+    if (healthCheck.type() == HealthCheck::HTTP ||
+        healthCheck.type() == HealthCheck::TCP) {
+      // Make sure HTTP and TCP health checks are run
+      // from the container's network namespace.
+      namespaces.push_back("net");
+    }
+
     Try<Owned<health::HealthChecker>> _checker = health::HealthChecker::create(
         healthCheck,
         self(),
-        task.task_id());
+        task.task_id(),
+        containerPid,
+        namespaces);
 
     if (_checker.isError()) {
       // TODO(gilbert): Consider ABORT and return a TASK_FAILED here.
@@ -555,6 +567,7 @@ private:
   Option<TaskID> taskId;
   Owned<health::HealthChecker> checker;
   Option<NetworkInfo> containerNetworkInfo;
+  Option<pid_t> containerPid;
 };
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index e5e7b94..f373df1 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -46,6 +46,10 @@
 
 #include "common/status_utils.hpp"
 
+#ifdef __linux__
+#include "linux/ns.hpp"
+#endif
+
 using process::delay;
 using process::Clock;
 using process::Failure;
@@ -73,10 +77,40 @@ static const string DEFAULT_HTTP_SCHEME = "http";
 static const string DEFAULT_DOMAIN = "127.0.0.1";
 
 
+#ifdef __linux__
+pid_t cloneWithSetns(
+    const lambda::function<int()>& func,
+    Option<pid_t> taskPid,
+    const vector<string>& namespaces)
+{
+  return process::defaultClone([=]() -> int {
+    if (taskPid.isSome()) {
+      foreach (const string& ns, namespaces) {
+        Try<Nothing> setns = ns::setns(taskPid.get(), ns);
+        if (setns.isError()) {
+          // This effectively aborts the health check.
+          LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
+                     << "task (pid: '" << taskPid.get() << "'): "
+                     << setns.error();
+        }
+
+        VLOG(1) << "Entered the " << ns << " namespace of "
+                << "task (pid: '" << taskPid.get() << "') successfully";
+      }
+    }
+
+    return func();
+  });
+}
+#endif
+
+
 Try<Owned<HealthChecker>> HealthChecker::create(
     const HealthCheck& check,
     const UPID& executor,
-    const TaskID& taskID)
+    const TaskID& taskID,
+    Option<pid_t> taskPid,
+    const vector<string>& namespaces)
 {
   // Validate the 'HealthCheck' protobuf.
   Option<Error> error = validation::healthCheck(check);
@@ -87,7 +121,9 @@ Try<Owned<HealthChecker>> HealthChecker::create(
   Owned<HealthCheckerProcess> process(new HealthCheckerProcess(
       check,
       executor,
-      taskID));
+      taskID,
+      taskPid,
+      namespaces));
 
   return Owned<HealthChecker>(new HealthChecker(process));
 }
@@ -117,13 +153,24 @@ Future<Nothing> HealthChecker::healthCheck()
 HealthCheckerProcess::HealthCheckerProcess(
     const HealthCheck& _check,
     const UPID& _executor,
-    const TaskID& _taskID)
+    const TaskID& _taskID,
+    Option<pid_t> _taskPid,
+    const vector<string>& _namespaces)
   : ProcessBase(process::ID::generate("health-checker")),
     check(_check),
     initializing(true),
     executor(_executor),
     taskID(_taskID),
-    consecutiveFailures(0) {}
+    taskPid(_taskPid),
+    namespaces(_namespaces),
+    consecutiveFailures(0)
+{
+#ifdef __linux__
+  if (!namespaces.empty()) {
+    clone = lambda::bind(&cloneWithSetns, lambda::_1, taskPid, namespaces);
+  }
+#endif
+}
 
 
 Future<Nothing> HealthCheckerProcess::healthCheck()
@@ -264,7 +311,8 @@ Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
         Subprocess::FD(STDERR_FILENO),
         Subprocess::FD(STDERR_FILENO),
         NO_SETSID,
-        environment);
+        environment,
+        clone);
   } else {
     // Use the exec variant.
     vector<string> argv;
@@ -283,7 +331,8 @@ Future<Nothing> HealthCheckerProcess::_commandHealthCheck()
         Subprocess::FD(STDERR_FILENO),
         NO_SETSID,
         nullptr,
-        environment);
+        environment,
+        clone);
   }
 
   if (external.isError()) {
@@ -353,7 +402,11 @@ Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
       argv,
       Subprocess::PATH("/dev/null"),
       Subprocess::PIPE(),
-      Subprocess::PIPE());
+      Subprocess::PIPE(),
+      NO_SETSID,
+      nullptr,
+      None(),
+      clone);
 
   if (s.isError()) {
     return Failure("Failed to create the curl subprocess: " + s.error());
@@ -461,7 +514,11 @@ Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
       argv,
       Subprocess::PATH("/dev/null"),
       Subprocess::PIPE(),
-      Subprocess::PIPE());
+      Subprocess::PIPE(),
+      NO_SETSID,
+      nullptr,
+      None(),
+      clone);
 
   if (s.isError()) {
     return Failure("Failed to create the bash subprocess: " + s.error());

http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 52acdef..392b4d5 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -19,6 +19,7 @@
 
 #include <string>
 #include <tuple>
+#include <vector>
 
 #include <mesos/mesos.hpp>
 
@@ -43,10 +44,25 @@ class HealthCheckerProcess;
 class HealthChecker
 {
 public:
+  /**
+   * Attempts to create a `HealthChecker` object.
+   *
+   * @param check The protobuf message definition of health check.
+   * @param executor The executor UPID to which health check results will be
+   *     reported.
+   * @param taskID The TaskID of the target task.
+   * @param taskPid The target task's pid used to enter the specified
+   *     namespaces.
+   * @param namespaces The namespaces to enter prior performing a single health
+   *     check.
+   * @return A `HealthChecker` object or an error if `create` fails.
+   */
   static Try<process::Owned<HealthChecker>> create(
       const HealthCheck& check,
       const process::UPID& executor,
-      const TaskID& taskID);
+      const TaskID& taskID,
+      Option<pid_t> taskPid,
+      const std::vector<std::string>& namespaces);
 
   ~HealthChecker();
 
@@ -65,7 +81,9 @@ public:
   HealthCheckerProcess(
       const HealthCheck& _check,
       const process::UPID& _executor,
-      const TaskID& _taskID);
+      const TaskID& _taskID,
+      Option<pid_t> _taskPid,
+      const std::vector<std::string>& _namespaces);
 
   virtual ~HealthCheckerProcess() {}
 
@@ -104,6 +122,9 @@ private:
   bool initializing;
   process::UPID executor;
   TaskID taskID;
+  Option<pid_t> taskPid;
+  std::vector<std::string> namespaces;
+  Option<lambda::function<pid_t(const lambda::function<int()>&)>> clone;
   uint32_t consecutiveFailures;
   process::Time startTime;
 };

http://git-wip-us.apache.org/repos/asf/mesos/blob/bb084fb8/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 71ede1e..5370634 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -418,11 +418,25 @@ protected:
     cout << "Forked command at " << pid << endl;
 
     if (task->has_health_check()) {
+      vector<string> namespaces;
+      if (rootfs.isSome() &&
+          task->health_check().type() == HealthCheck::COMMAND) {
+        // Make sure command health checks are run from the task's mount
+        // namespace. Otherwise if rootfs is specified the command binary
+        // may not be available in the executor.
+        //
+        // NOTE: The command executor shares the network namespace
+        // with its task, hence no need to enter it explicitly.
+        namespaces.push_back("mnt");
+      }
+
       Try<Owned<health::HealthChecker>> _checker =
         health::HealthChecker::create(
             task->health_check(),
             self(),
-            task->task_id());
+            task->task_id(),
+            pid,
+            namespaces);
 
       if (_checker.isError()) {
         // TODO(gilbert): Consider ABORT and return a TASK_FAILED here.


[03/10] mesos git commit: Unshared the mount namespace when launching mesos-containerizer.

Posted by al...@apache.org.
Unshared the mount namespace when launching mesos-containerizer.

When launching a task from the mesos-executor, ensure that the mount
namespace is not shared between the task and the executor if the task
specifies rootfs. Otherwise, `pivot_root` in the mesos-containerizer
binary would affect the mount namespace of the executor and possibly
prevent it from accessing some binaries or libraries.

Review: https://reviews.apache.org/r/51266/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/564eda0a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/564eda0a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/564eda0a

Branch: refs/heads/master
Commit: 564eda0a5bbb74a9a6b2b8dfdc4f34c0a213d3c6
Parents: 537584c
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:31:07 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200

----------------------------------------------------------------------
 src/launcher/posix/executor.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/564eda0a/src/launcher/posix/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/posix/executor.cpp b/src/launcher/posix/executor.cpp
index 50b9b30..7c40ebe 100644
--- a/src/launcher/posix/executor.cpp
+++ b/src/launcher/posix/executor.cpp
@@ -60,6 +60,9 @@ pid_t launchTaskPosix(
     const Option<string>& sandboxDirectory,
     const Option<string>& workingDirectory)
 {
+  // Prepare the flags to pass to the launch process.
+  MesosContainerizerLaunch::Flags launchFlags;
+
   if (rootfs.isSome()) {
     // The command executor is responsible for chrooting into the
     // root filesystem and changing the user before exec-ing the
@@ -73,14 +76,16 @@ pid_t launchTaskPosix(
     } else if (_user.get() != "root") {
       ABORT("The command executor requires root with rootfs");
     }
+
+    // Ensure that mount namespace of the executor is not affected by
+    // changes in its task's namespace induced by calling `pivot_root`
+    // as part of the task setup in mesos-containerizer binary.
+    launchFlags.unshare_namespace_mnt = true;
 #else
     ABORT("Not expecting root volume with non-linux platform");
 #endif // __linux__
   }
 
-  // Prepare the flags to pass to the launch process.
-  MesosContainerizerLaunch::Flags launchFlags;
-
   launchFlags.command = JSON::protobuf(command);
 
   if (rootfs.isSome()) {


[10/10] mesos git commit: Updated the CHANGELOG with HTTP(S) and TCP health checks summary.

Posted by al...@apache.org.
Updated the CHANGELOG with HTTP(S) and TCP health checks summary.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/037a346a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/037a346a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/037a346a

Branch: refs/heads/master
Commit: 037a346a205ad7bdba99d771855f8caeea835d4a
Parents: bb084fb
Author: Alexander Rukletsov <al...@apache.org>
Authored: Sun Aug 28 16:27:42 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:43:21 2016 +0200

----------------------------------------------------------------------
 CHANGELOG | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/037a346a/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 587d843..eadf546 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,8 +2,20 @@ Release Notes - Mesos - Version 1.1.0 (WIP)
 --------------------------------------------
 This release contains the following new features:
 
-Deprecations:
+  * [MESOS-2533] - Support for HTTP and HTTPS health checks. Executors may now
+    use the updated `HealthCheck` protobuf to implement HTTP(S) health checks.
+    Both default executors (command and docker) leverage `curl` binary for
+    sending HTTP(S) requests and connect to `127.0.0.1`, hence a task must
+    listen on all interfaces. On Linux, For BRIDGE and USER modes, docker
+    executor enters the task's network namespace.
+
+  * [MESOS-3567] - Support for TCP health checks. Executors may now use the
+    updated `HealthCheck` protobuf to implement TCP health checks. Both default
+    executors (command and docker) connect to `127.0.0.1`, hence a task must
+    listen on all interfaces. On Linux, For BRIDGE and USER modes, docker
+    executor enters the task's network namespace.
 
+Deprecations:
   * The following metrics are deprecated and will be removed in Mesos 1.4:
     master/slave_shutdowns_scheduled, master/slave_shutdowns_canceled, and
     slave_shutdowns_completed. As of Mesos 1.1.0, these metrics will always be
@@ -11,6 +23,8 @@ Deprecations:
     master/slave_unreachable_scheduled, master/slave_unreachable_canceled,
     and master/slave_unreachable_completed.
 
+  * [MESOS-5955] - Health check binary "mesos-health-check" is removed.
+
 
 Release Notes - Mesos - Version 1.0.1
 --------------------------------------------


[06/10] mesos git commit: Moved `process::internal::defaultClone` to `process::`.

Posted by al...@apache.org.
Moved `process::internal::defaultClone` to `process::`.

Review: https://reviews.apache.org/r/51378/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d2f8bfab
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d2f8bfab
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d2f8bfab

Branch: refs/heads/master
Commit: d2f8bfabe48bee5533c30afaecbcf76ef081248f
Parents: 1d00492
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:34:01 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200

----------------------------------------------------------------------
 .../include/process/posix/subprocess.hpp        | 33 ++++++++++----------
 1 file changed, 17 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d2f8bfab/3rdparty/libprocess/include/process/posix/subprocess.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/include/process/posix/subprocess.hpp b/3rdparty/libprocess/include/process/posix/subprocess.hpp
index a871fe4..57f71d1 100644
--- a/3rdparty/libprocess/include/process/posix/subprocess.hpp
+++ b/3rdparty/libprocess/include/process/posix/subprocess.hpp
@@ -51,6 +51,23 @@ namespace process {
 using InputFileDescriptors = Subprocess::IO::InputFileDescriptors;
 using OutputFileDescriptors = Subprocess::IO::OutputFileDescriptors;
 
+
+inline pid_t defaultClone(const lambda::function<int()>& func)
+{
+  pid_t pid = ::fork();
+  if (pid == -1) {
+    return -1;
+  } else if (pid == 0) {
+    // Child.
+    ::exit(func());
+    UNREACHABLE();
+  } else {
+    // Parent.
+    return pid;
+  }
+}
+
+
 namespace internal {
 
 // This function will invoke `os::close` on all specified file
@@ -100,22 +117,6 @@ inline Try<Nothing> cloexec(
 }
 
 
-inline pid_t defaultClone(const lambda::function<int()>& func)
-{
-  pid_t pid = ::fork();
-  if (pid == -1) {
-    return -1;
-  } else if (pid == 0) {
-    // Child.
-    ::exit(func());
-    UNREACHABLE();
-  } else {
-    // Parent.
-    return pid;
-  }
-}
-
-
 inline void signalHandler(int signal)
 {
   // Send SIGKILL to every process in the process group of the


[08/10] mesos git commit: Added support for TCP health checks.

Posted by al...@apache.org.
Added support for TCP health checks.

Review: https://reviews.apache.org/r/49360/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1d004920
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1d004920
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1d004920

Branch: refs/heads/master
Commit: 1d00492086a3b8298c2f23fd5196d31d0bfef73d
Parents: 2b80207
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:52 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200

----------------------------------------------------------------------
 src/health-check/health_checker.cpp | 80 +++++++++++++++++++++++++++++++-
 src/health-check/health_checker.hpp |  6 +++
 2 files changed, 85 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1d004920/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 4dd573b..e5e7b94 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -444,7 +444,85 @@ Future<Nothing> HealthCheckerProcess::_tcpHealthCheck()
   CHECK_EQ(HealthCheck::TCP, check.type());
   CHECK(check.has_tcp());
 
-  promise.fail("TCP health check is not supported");
+  const HealthCheck::TCPCheckInfo& tcp = check.tcp();
+
+  VLOG(1) << "Launching TCP health check at port '" << tcp.port() << "'";
+
+  // TODO(haosdent): Replace `bash` with a tiny binary to support
+  // TCP health check with half-open.
+  const vector<string> argv = {
+    "bash",
+    "-c",
+    "</dev/tcp/" + DEFAULT_DOMAIN + "/" + stringify(tcp.port())
+  };
+
+  Try<Subprocess> s = subprocess(
+      "bash",
+      argv,
+      Subprocess::PATH("/dev/null"),
+      Subprocess::PIPE(),
+      Subprocess::PIPE());
+
+  if (s.isError()) {
+    return Failure("Failed to create the bash subprocess: " + s.error());
+  }
+
+  pid_t bashPid = s->pid();
+  Duration timeout = Seconds(check.timeout_seconds());
+
+  return await(
+      s->status(),
+      process::io::read(s->out().get()),
+      process::io::read(s->err().get()))
+    .after(timeout,
+      [timeout, bashPid](Future<tuple<Future<Option<int>>,
+                                      Future<string>,
+                                      Future<string>>> future) {
+      future.discard();
+
+      if (bashPid != -1) {
+        // Cleanup the bash process.
+        VLOG(1) << "Killing the TCP health check process " << bashPid;
+
+        os::killtree(bashPid, SIGKILL);
+      }
+
+      return Failure(
+          "bash has not returned after " + stringify(timeout) + "; aborting");
+    })
+    .then(defer(self(), &Self::__tcpHealthCheck, lambda::_1));
+}
+
+
+Future<Nothing> HealthCheckerProcess::__tcpHealthCheck(
+    const tuple<
+        Future<Option<int>>,
+        Future<string>,
+        Future<string>>& t)
+{
+  Future<Option<int>> status = std::get<0>(t);
+  if (!status.isReady()) {
+    return Failure(
+        "Failed to get the exit status of the bash process: " +
+        (status.isFailed() ? status.failure() : "discarded"));
+  }
+
+  if (status->isNone()) {
+    return Failure("Failed to reap the bash process");
+  }
+
+  int statusCode = status->get();
+  if (statusCode != 0) {
+    Future<string> error = std::get<2>(t);
+    if (!error.isReady()) {
+      return Failure("bash returned " + WSTRINGIFY(statusCode) +
+                     "; reading stderr failed: " +
+                     (error.isFailed() ? error.failure() : "discarded"));
+    }
+
+    return Failure("bash returned " + WSTRINGIFY(statusCode) + ": " +
+                   error.get());
+  }
 
   return Nothing();
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/1d004920/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 83a3384..52acdef 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -91,6 +91,12 @@ private:
 
   process::Future<Nothing> _tcpHealthCheck();
 
+  process::Future<Nothing> __tcpHealthCheck(
+      const std::tuple<
+          process::Future<Option<int>>,
+          process::Future<std::string>,
+          process::Future<std::string>>& t);
+
   void reschedule();
 
   process::Promise<Nothing> promise;


[02/10] mesos git commit: Updated the docker executor to use health checks via the library.

Posted by al...@apache.org.
Updated the docker executor to use health checks via the library.

We updated the command executor to use health checks via library in
https://reviews.apache.org/r/49389/. This patch updates the docker
executor for consistency.

Review: https://reviews.apache.org/r/49555/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b16e5fa0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b16e5fa0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b16e5fa0

Branch: refs/heads/master
Commit: b16e5fa04575aa632f5e3a55c1d6320cb09add89
Parents: 564eda0
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:31:32 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:31:54 2016 +0200

----------------------------------------------------------------------
 src/docker/executor.cpp | 73 ++++++++++++++------------------------------
 1 file changed, 23 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b16e5fa0/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 8d679cd..5508819 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -84,14 +84,13 @@ public:
       const string& sandboxDirectory,
       const string& mappedDirectory,
       const Duration& shutdownGracePeriod,
-      const string& healthCheckDir,
+      const string& launcherDir,
       const map<string, string>& taskEnvironment)
     : ProcessBase(ID::generate("docker-executor")),
       killed(false),
       killedByHealthCheck(false),
       terminated(false),
-      healthPid(-1),
-      healthCheckDir(healthCheckDir),
+      launcherDir(launcherDir),
       docker(docker),
       containerName(containerName),
       sandboxDirectory(sandboxDirectory),
@@ -340,17 +339,6 @@ private:
       inspect
         .onAny(defer(self(), &Self::_killTask, _taskId, gracePeriod));
     }
-
-    // Cleanup health check process.
-    //
-    // TODO(bmahler): Consider doing this after the task has been
-    // reaped, since a framework may be interested in health
-    // information while the task is being killed (consider a
-    // task that takes 30 minutes to be cleanly killed).
-    if (healthPid != -1) {
-      os::killtree(healthPid, SIGKILL);
-      healthPid = -1;
-    }
   }
 
   void _killTask(const TaskID& taskId_, const Duration& gracePeriod)
@@ -522,41 +510,26 @@ private:
           strings::join(" ", commandArguments));
     }
 
-    JSON::Object json = JSON::protobuf(healthCheck);
-
-    const string path = path::join(healthCheckDir, "mesos-health-check");
+    Try<Owned<health::HealthChecker>> _checker = health::HealthChecker::create(
+        healthCheck,
+        self(),
+        task.task_id());
 
-    // Launch the subprocess using 'exec' style so that quotes can
-    // be properly handled.
-    vector<string> checkerArguments;
-    checkerArguments.push_back(path);
-    checkerArguments.push_back("--executor=" + stringify(self()));
-    checkerArguments.push_back("--health_check_json=" + stringify(json));
-    checkerArguments.push_back("--task_id=" + task.task_id().value());
-
-    cout << "Launching health check process: "
-         << strings::join(" ", checkerArguments) << endl;
-
-    Try<Subprocess> healthProcess =
-      process::subprocess(
-        path,
-        checkerArguments,
-        // Intentionally not sending STDIN to avoid health check
-        // commands that expect STDIN input to block.
-        Subprocess::PATH("/dev/null"),
-        Subprocess::FD(STDOUT_FILENO),
-        Subprocess::FD(STDERR_FILENO));
+    if (_checker.isError()) {
+      // TODO(gilbert): Consider ABORT and return a TASK_FAILED here.
+      cerr << "Failed to create health checker: "
+           << _checker.error() << endl;
+    } else {
+      checker = _checker.get();
 
-    if (healthProcess.isError()) {
-      cerr << "Unable to launch health process: "
-           << healthProcess.error() << endl;
-      return;
+      checker->healthCheck()
+        .onAny([](const Future<Nothing>& future) {
+          // Only possible to be a failure.
+          if (future.isFailed()) {
+            cerr << "Health check failed:" << future.failure() << endl;
+          }
+        });
     }
-
-    healthPid = healthProcess.get().pid();
-
-    cout << "Health check process launched at pid: "
-         << stringify(healthPid) << endl;
   }
 
   // TODO(alexr): Introduce a state enum and document transitions,
@@ -565,8 +538,7 @@ private:
   bool killedByHealthCheck;
   bool terminated;
 
-  pid_t healthPid;
-  string healthCheckDir;
+  string launcherDir;
   Owned<Docker> docker;
   string containerName;
   string sandboxDirectory;
@@ -581,6 +553,7 @@ private:
   Option<ExecutorDriver*> driver;
   Option<FrameworkInfo> frameworkInfo;
   Option<TaskID> taskId;
+  Owned<health::HealthChecker> checker;
   Option<NetworkInfo> containerNetworkInfo;
 };
 
@@ -594,7 +567,7 @@ public:
       const string& sandboxDirectory,
       const string& mappedDirectory,
       const Duration& shutdownGracePeriod,
-      const string& healthCheckDir,
+      const string& launcherDir,
       const map<string, string>& taskEnvironment)
   {
     process = Owned<DockerExecutorProcess>(new DockerExecutorProcess(
@@ -603,7 +576,7 @@ public:
         sandboxDirectory,
         mappedDirectory,
         shutdownGracePeriod,
-        healthCheckDir,
+        launcherDir,
         taskEnvironment));
 
     spawn(process.get());


[07/10] mesos git commit: Added support for HTTP/HTTPS health checks.

Posted by al...@apache.org.
Added support for HTTP/HTTPS health checks.

Review: https://reviews.apache.org/r/36816/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2b80207d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2b80207d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2b80207d

Branch: refs/heads/master
Commit: 2b80207d1f05aa9dcb5747d161de84aada41643c
Parents: 7380d13
Author: haosdent huang <ha...@gmail.com>
Authored: Fri Aug 26 16:33:41 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Sun Aug 28 16:42:57 2016 +0200

----------------------------------------------------------------------
 src/health-check/health_checker.cpp | 117 ++++++++++++++++++++++++++++++-
 src/health-check/health_checker.hpp |   7 ++
 2 files changed, 123 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2b80207d/src/health-check/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.cpp b/src/health-check/health_checker.cpp
index 097117a..4dd573b 100644
--- a/src/health-check/health_checker.cpp
+++ b/src/health-check/health_checker.cpp
@@ -31,6 +31,8 @@
 
 #include <process/collect.hpp>
 #include <process/delay.hpp>
+#include <process/http.hpp>
+#include <process/io.hpp>
 #include <process/subprocess.hpp>
 
 #include <stout/duration.hpp>
@@ -57,12 +59,20 @@ using process::UPID;
 
 using std::map;
 using std::string;
+using std::tuple;
 using std::vector;
 
 namespace mesos {
 namespace internal {
 namespace health {
 
+static const string DEFAULT_HTTP_SCHEME = "http";
+
+// Use '127.0.0.1' instead of 'localhost', because the host
+// file in some container images may not contain 'localhost'.
+static const string DEFAULT_DOMAIN = "127.0.0.1";
+
+
 Try<Owned<HealthChecker>> HealthChecker::create(
     const HealthCheck& check,
     const UPID& executor,
@@ -318,7 +328,112 @@ Future<Nothing> HealthCheckerProcess::_httpHealthCheck()
   CHECK_EQ(HealthCheck::HTTP, check.type());
   CHECK(check.has_http());
 
-  promise.fail("HTTP health check is not supported");
+  const HealthCheck::HTTPCheckInfo& http = check.http();
+
+  const string scheme = http.has_scheme() ? http.scheme() : DEFAULT_HTTP_SCHEME;
+  const string path = http.has_path() ? http.path() : "";
+  const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
+                     stringify(http.port()) + path;
+
+  VLOG(1) << "Launching HTTP health check '" << url << "'";
+
+  const vector<string> argv = {
+    "curl",
+    "-s",                 // Don't show progress meter or error messages.
+    "-S",                 // Makes curl show an error message if it fails.
+    "-L",                 // Follows HTTP 3xx redirects.
+    "-k",                 // Ignores SSL validation when scheme is https.
+    "-w", "%{http_code}", // Displays HTTP response code on stdout.
+    "-o", "/dev/null",    // Ignores output.
+    url
+  };
+
+  Try<Subprocess> s = subprocess(
+      "curl",
+      argv,
+      Subprocess::PATH("/dev/null"),
+      Subprocess::PIPE(),
+      Subprocess::PIPE());
+
+  if (s.isError()) {
+    return Failure("Failed to create the curl subprocess: " + s.error());
+  }
+
+  pid_t curlPid = s->pid();
+  Duration timeout = Seconds(check.timeout_seconds());
+
+  return await(
+      s->status(),
+      process::io::read(s->out().get()),
+      process::io::read(s->err().get()))
+    .after(timeout,
+      [timeout, curlPid](Future<tuple<Future<Option<int>>,
+                                      Future<string>,
+                                      Future<string>>> future) {
+      future.discard();
+
+      if (curlPid != -1) {
+        // Cleanup the curl process.
+        VLOG(1) << "Killing the HTTP health check process " << curlPid;
+
+        os::killtree(curlPid, SIGKILL);
+      }
+
+      return Failure(
+          "curl has not returned after " + stringify(timeout) + "; aborting");
+    })
+    .then(defer(self(), &Self::__httpHealthCheck, lambda::_1));
+}
+
+
+Future<Nothing> HealthCheckerProcess::__httpHealthCheck(
+    const tuple<
+        Future<Option<int>>,
+        Future<string>,
+        Future<string>>& t)
+{
+  Future<Option<int>> status = std::get<0>(t);
+  if (!status.isReady()) {
+    return Failure(
+        "Failed to get the exit status of the curl process: " +
+        (status.isFailed() ? status.failure() : "discarded"));
+  }
+
+  if (status->isNone()) {
+    return Failure("Failed to reap the curl process");
+  }
+
+  int statusCode = status->get();
+  if (statusCode != 0) {
+    Future<string> error = std::get<2>(t);
+    if (!error.isReady()) {
+      return Failure("curl returned " + WSTRINGIFY(statusCode) +
+                     "; reading stderr failed: " +
+                     (error.isFailed() ? error.failure() : "discarded"));
+    }
+
+    return Failure("curl returned " + WSTRINGIFY(statusCode) + ": " +
+                   error.get());
+  }
+
+  Future<string> output = std::get<1>(t);
+  if (!output.isReady()) {
+    return Failure("Failed to read stdout from curl: " +
+                   (output.isFailed() ? output.failure() : "discarded"));
+  }
+
+  // Parse the output and get the HTTP response code.
+  Try<int> code = numify<int>(output.get());
+  if (code.isError()) {
+    return Failure("Unexpected output from curl: " + output.get());
+  }
+
+  if (code.get() < process::http::Status::OK ||
+      code.get() >= process::http::Status::BAD_REQUEST) {
+    return Failure(
+        "Unexpected HTTP response code: " +
+        process::http::Status::string(code.get()));
+  }
 
   return Nothing();
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/2b80207d/src/health-check/health_checker.hpp
----------------------------------------------------------------------
diff --git a/src/health-check/health_checker.hpp b/src/health-check/health_checker.hpp
index 83cedfb..83a3384 100644
--- a/src/health-check/health_checker.hpp
+++ b/src/health-check/health_checker.hpp
@@ -18,6 +18,7 @@
 #define __HEALTH_CHECKER_HPP__
 
 #include <string>
+#include <tuple>
 
 #include <mesos/mesos.hpp>
 
@@ -82,6 +83,12 @@ private:
 
   process::Future<Nothing> _httpHealthCheck();
 
+  process::Future<Nothing> __httpHealthCheck(
+      const std::tuple<
+          process::Future<Option<int>>,
+          process::Future<std::string>,
+          process::Future<std::string>>& t);
+
   process::Future<Nothing> _tcpHealthCheck();
 
   void reschedule();