You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/03/30 17:35:41 UTC

[4/4] mesos git commit: Improved log/failure messages in the (health)checker libraries.

Improved log/failure messages in the (health)checker libraries.

- Make log/failure messages consistent across both libraries.
- Task and container IDs are user generated and can contain spaces, so
  we have to wrap them in single quotes.
- Remove the redundant task IDs from 'Failure' messages.

Review: https://reviews.apache.org/r/57854/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5c9ce378
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5c9ce378
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5c9ce378

Branch: refs/heads/master
Commit: 5c9ce378f627cbd4c2ed16f1e342dde16d5ee939
Parents: 080e1b7
Author: Gast�n Kleiman <ga...@mesosphere.io>
Authored: Thu Mar 30 12:58:38 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 30 19:34:24 2017 +0200

----------------------------------------------------------------------
 src/checks/checker.cpp        |  60 +++++++-------
 src/checks/health_checker.cpp | 166 ++++++++++++++++++++-----------------
 src/tests/check_tests.cpp     |   6 +-
 3 files changed, 121 insertions(+), 111 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/checks/checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.cpp b/src/checks/checker.cpp
index 3f2d8d8..1664acd 100644
--- a/src/checks/checker.cpp
+++ b/src/checks/checker.cpp
@@ -102,13 +102,12 @@ static pid_t cloneWithSetns(
         Try<Nothing> setns = ns::setns(taskPid.get(), ns);
         if (setns.isError()) {
           // This effectively aborts the check.
-          LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
-                     << "task (pid: '" << taskPid.get() << "'): "
-                     << setns.error();
+          LOG(FATAL) << "Failed to enter the " << ns << " namespace of task"
+                     << " (pid: " << taskPid.get() << "): " << setns.error();
         }
 
-        VLOG(1) << "Entered the " << ns << " namespace of "
-                << "task (pid: '" << taskPid.get() << "') successfully";
+        VLOG(1) << "Entered the " << ns << " namespace of task"
+                << " (pid: " << taskPid.get() << ") successfully";
       }
     }
 
@@ -272,7 +271,7 @@ CheckerProcess::CheckerProcess(
 
 void CheckerProcess::initialize()
 {
-  VLOG(1) << "Check configuration for task " << taskId << ":"
+  VLOG(1) << "Check configuration for task '" << taskId << "':"
           << " '" << jsonify(JSON::Protobuf(check)) << "'";
 
   scheduleNext(checkDelay);
@@ -281,7 +280,7 @@ void CheckerProcess::initialize()
 
 void CheckerProcess::finalize()
 {
-  LOG(INFO) << "Checking for task " << taskId << " stopped";
+  LOG(INFO) << "Checking for task '" << taskId << "' stopped";
 }
 
 
@@ -315,7 +314,7 @@ void CheckerProcess::performCheck()
 
 void CheckerProcess::scheduleNext(const Duration& duration)
 {
-  VLOG(1) << "Scheduling check for task " << taskId << " in " << duration;
+  VLOG(1) << "Scheduling check for task '" << taskId << "' in " << duration;
 
   delay(duration, self(), &Self::performCheck);
 }
@@ -325,8 +324,8 @@ void CheckerProcess::processCheckResult(
     const Stopwatch& stopwatch,
     const CheckStatusInfo& result)
 {
-  VLOG(1) << "Performed " << check.type() << " check for task " << taskId
-          << " in " << stopwatch.elapsed();
+  VLOG(1) << "Performed " << check.type() << " check"
+          << " for task '" << taskId << "' in " << stopwatch.elapsed();
 
   // Trigger the callback if check info changes.
   if (result != previousCheckStatus) {
@@ -360,8 +359,8 @@ Future<int> CheckerProcess::commandCheck()
 
   if (command.shell()) {
     // Use the shell variant.
-    VLOG(1) << "Launching command check '" << command.value() << "'"
-            << " for task " << taskId;
+    VLOG(1) << "Launching COMMAND check '" << command.value() << "'"
+            << " for task '" << taskId << "'";
 
     s = process::subprocess(
         command.value(),
@@ -375,8 +374,8 @@ Future<int> CheckerProcess::commandCheck()
     vector<string> argv(
         std::begin(command.arguments()), std::end(command.arguments()));
 
-    VLOG(1) << "Launching command check [" << command.value() << ", "
-            << strings::join(", ", argv) << "] for task " << taskId;
+    VLOG(1) << "Launching COMMAND check [" << command.value() << ", "
+            << strings::join(", ", argv) << "] for task '" << taskId << "'";
 
     s = process::subprocess(
         command.value(),
@@ -407,14 +406,13 @@ Future<int> CheckerProcess::commandCheck()
 
       if (commandPid != -1) {
         // Cleanup the external command process.
-        VLOG(1) << "Killing the command check process " << commandPid
-                << " for task " << _taskId;
+        VLOG(1) << "Killing the COMMAND check process '" << commandPid
+                << "' for task '" << _taskId << "'";
 
         os::killtree(commandPid, SIGKILL);
       }
 
-      return Failure(
-          "Command timed out after " + stringify(timeout) + "; aborting");
+      return Failure("Command timed out after " + stringify(timeout));
     })
     .then([](const Option<int>& exitCode) -> Future<int> {
       if (exitCode.isNone()) {
@@ -441,15 +439,16 @@ void CheckerProcess::processCommandCheckResult(
   // see MESOS-7242.
   if (result.isReady() && WIFEXITED(result.get())) {
     const int exitCode = WEXITSTATUS(result.get());
-    VLOG(1) << check.type() << " check for task "
-            << taskId << " returned " << exitCode;
+    VLOG(1) << check.type() << " check for task '"
+            << taskId << "' returned: " << exitCode;
 
     checkStatusInfo.mutable_command()->set_exit_code(
         static_cast<int32_t>(exitCode));
   } else {
     // Check's status is currently not available, which may indicate a change
     // that should be reported as an empty `CheckStatusInfo.Command` message.
-    LOG(WARNING) << "Check for task " << taskId << " failed: "
+    LOG(WARNING) << check.type() << " check for task '" << taskId
+                 << "' failed: "
                  << (result.isFailed() ? result.failure() : "discarded");
 
     checkStatusInfo.mutable_command();
@@ -471,7 +470,7 @@ Future<int> CheckerProcess::httpCheck()
   const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
                      stringify(http.port()) + path;
 
-  VLOG(1) << "Launching HTTP check '" << url << "' for task " << taskId;
+  VLOG(1) << "Launching HTTP check '" << url << "' for task '" << taskId << "'";
 
   const vector<string> argv = {
     HTTP_CHECK_COMMAND,
@@ -522,14 +521,14 @@ Future<int> CheckerProcess::httpCheck()
       if (curlPid != -1) {
         // Cleanup the HTTP_CHECK_COMMAND process.
         VLOG(1) << "Killing the HTTP check process " << curlPid
-                << " for task " << _taskId;
+                << " for task '" << _taskId << "'";
 
         os::killtree(curlPid, SIGKILL);
       }
 
       return Failure(
           string(HTTP_CHECK_COMMAND) + " timed out after " +
-          stringify(timeout) + "; aborting");
+          stringify(timeout));
     })
     .then(defer(self(), &Self::_httpCheck, lambda::_1));
 }
@@ -595,15 +594,15 @@ void CheckerProcess::processHttpCheckResult(
   checkStatusInfo.set_type(check.type());
 
   if (result.isReady()) {
-    VLOG(1) << check.type() << " check for task "
-            << taskId << " returned " << result.get();
+    VLOG(1) << check.type() << " check for task '"
+            << taskId << "' returned: " << result.get();
 
     checkStatusInfo.mutable_http()->set_status_code(
         static_cast<uint32_t>(result.get()));
   } else {
     // Check's status is currently not available, which may indicate a change
     // that should be reported as an empty `CheckStatusInfo.Http` message.
-    LOG(WARNING) << "Check for task " << taskId << " failed: "
+    LOG(WARNING) << "Check for task '" << taskId << "' failed: "
                  << (result.isFailed() ? result.failure() : "discarded");
 
     checkStatusInfo.mutable_http();
@@ -623,7 +622,7 @@ Option<Error> checkInfo(const CheckInfo& checkInfo)
   switch (checkInfo.type()) {
     case CheckInfo::COMMAND: {
       if (!checkInfo.has_command()) {
-        return Error("Expecting 'command' to be set for command check");
+        return Error("Expecting 'command' to be set for COMMAND check");
       }
 
       const CommandInfo& command = checkInfo.command().command();
@@ -656,8 +655,7 @@ Option<Error> checkInfo(const CheckInfo& checkInfo)
 
       if (http.has_path() && !strings::startsWith(http.path(), '/')) {
         return Error(
-            "The path '" + http.path() +
-            "' of HTTP  check must start with '/'");
+            "The path '" + http.path() + "' of HTTP check must start with '/'");
       }
 
       break;
@@ -696,7 +694,7 @@ Option<Error> checkStatusInfo(const CheckStatusInfo& checkStatusInfo)
     case CheckInfo::COMMAND: {
       if (!checkStatusInfo.has_command()) {
         return Error(
-            "Expecting 'command' to be set for command check's status");
+            "Expecting 'command' to be set for COMMAND check's status");
       }
       break;
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/checks/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/health_checker.cpp b/src/checks/health_checker.cpp
index b768c5b..769278c 100644
--- a/src/checks/health_checker.cpp
+++ b/src/checks/health_checker.cpp
@@ -116,13 +116,12 @@ static pid_t cloneWithSetns(
         Try<Nothing> setns = ns::setns(taskPid.get(), ns);
         if (setns.isError()) {
           // This effectively aborts the health check.
-          LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
-                     << "task (pid: '" << taskPid.get() << "'): "
-                     << setns.error();
+          LOG(FATAL) << "Failed to enter the " << ns << " namespace of task"
+                     << " (pid: '" << taskPid.get() << "'): " << setns.error();
         }
 
-        VLOG(1) << "Entered the " << ns << " namespace of "
-                << "task (pid: '" << taskPid.get() << "') successfully";
+        VLOG(1) << "Entered the " << ns << " namespace of task"
+                << " (pid: '" << taskPid.get() << "') successfully";
       }
     }
 
@@ -269,7 +268,7 @@ HealthCheckerProcess::HealthCheckerProcess(
 
 void HealthCheckerProcess::initialize()
 {
-  VLOG(1) << "Health check configuration:"
+  VLOG(1) << "Health check configuration for task '" << taskId << "':"
           << " '" << jsonify(JSON::Protobuf(check)) << "'";
 
   startTime = Clock::now();
@@ -283,14 +282,17 @@ void HealthCheckerProcess::failure(const string& message)
   if (initializing &&
       checkGracePeriod.secs() > 0 &&
       (Clock::now() - startTime) <= checkGracePeriod) {
-    LOG(INFO) << "Ignoring failure as health check still in grace period";
+    LOG(INFO) << "Ignoring failure of "
+              << HealthCheck::Type_Name(check.type()) << " health check for"
+              << " task '" << taskId << "': still in grace period";
     scheduleNext(checkInterval);
     return;
   }
 
   consecutiveFailures++;
-  LOG(WARNING) << "Health check failed " << consecutiveFailures
-               << " times consecutively: " << message;
+  LOG(WARNING) << HealthCheck::Type_Name(check.type())
+               << " health check for task '" << taskId << "' failed "
+               << consecutiveFailures << " times consecutively: " << message;
 
   bool killTask = consecutiveFailures >= check.consecutive_failures();
 
@@ -314,7 +316,8 @@ void HealthCheckerProcess::failure(const string& message)
 
 void HealthCheckerProcess::success()
 {
-  VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check passed";
+  VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check for task '"
+          << taskId << "' passed";
 
   // Send a healthy status update on the first success,
   // and on the first success following failure(s).
@@ -376,20 +379,22 @@ void HealthCheckerProcess::processCheckResult(
 {
   // `HealthChecker` might have been paused while performing the check.
   if (paused) {
-    LOG(INFO) << "Ignoring health check result of task " + stringify(taskId) +
-                 " (health checking is paused)";
+    LOG(INFO) << "Ignoring " << HealthCheck::Type_Name(check.type())
+              << " health check result for task '" << taskId
+              << "': health checking is paused";
     return;
   }
 
   if (future.isDiscarded()) {
-    LOG(INFO) << HealthCheck::Type_Name(check.type()) +
-                 " health check of task " + stringify(taskId) + " discarded";
+    LOG(INFO) << HealthCheck::Type_Name(check.type()) << " health check for"
+              << " task '" << taskId << "' discarded";
     scheduleNext(checkInterval);
     return;
   }
 
   VLOG(1) << "Performed " << HealthCheck::Type_Name(check.type())
-          << " health check in " << stopwatch.elapsed();
+          << " health check for task '" << taskId << "' in "
+          << stopwatch.elapsed();
 
   if (future.isReady()) {
     success();
@@ -397,7 +402,8 @@ void HealthCheckerProcess::processCheckResult(
   }
 
   string message = HealthCheck::Type_Name(check.type()) +
-                   " health check failed: " + future.failure();
+                   " health check for task '" + stringify(taskId) +
+                   "' failed: " + future.failure();
 
   failure(message);
 }
@@ -422,7 +428,8 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
 
   if (command.shell()) {
     // Use the shell variant.
-    VLOG(1) << "Launching command health check '" << command.value() << "'";
+    VLOG(1) << "Launching COMMAND health check '" << command.value() << "'"
+            << " for task '" << taskId << "'";
 
     external = subprocess(
         command.value(),
@@ -438,8 +445,8 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
       argv.push_back(arg);
     }
 
-    VLOG(1) << "Launching command health check [" << command.value() << ", "
-            << strings::join(", ", argv) << "]";
+    VLOG(1) << "Launching COMMAND health check [" << command.value() << ", "
+            << strings::join(", ", argv) << "] for task '" << taskId << "'";
 
     external = subprocess(
         command.value(),
@@ -456,24 +463,27 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
     return Failure("Failed to create subprocess: " + external.error());
   }
 
+  // TODO(alexr): Use lambda named captures for
+  // these cached values once they are available.
   pid_t commandPid = external->pid();
   const Duration timeout = checkTimeout;
+  const TaskID _taskId = taskId;
 
   return external->status()
     .after(
         timeout,
-        [timeout, commandPid](Future<Option<int>> future) {
+        [timeout, commandPid, _taskId](Future<Option<int>> future) {
       future.discard();
 
       if (commandPid != -1) {
         // Cleanup the external command process.
-        VLOG(1) << "Killing the command health check process " << commandPid;
+        VLOG(1) << "Killing the COMMAND health check process '" << commandPid
+                << "' for task '" << _taskId << "'";
 
         os::killtree(commandPid, SIGKILL);
       }
 
-      return Failure(
-          "Command timed out after " + stringify(timeout) + "; aborting");
+      return Failure("Command timed out after " + stringify(timeout));
     })
     .then([](const Option<int>& status) -> Future<Nothing> {
       if (status.isNone()) {
@@ -482,7 +492,7 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
 
       int statusCode = status.get();
       if (statusCode != 0) {
-        return Failure("Command returned " + WSTRINGIFY(statusCode));
+        return Failure("Command returned: " + WSTRINGIFY(statusCode));
       }
 
       return Nothing();
@@ -497,7 +507,7 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
   CHECK(check.has_command());
   CHECK_SOME(agentURL);
 
-  VLOG(1) << "Launching command health check of task " << stringify(taskId);
+  VLOG(1) << "Launching COMMAND health check for task '" << taskId << "'";
 
   // We don't want recoverable errors, e.g., the agent responding with
   // HTTP status code 503, to trigger a health check failure.
@@ -526,11 +536,12 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
                        {"Content-Type", stringify(ContentType::PROTOBUF)}};
 
     process::http::request(request, false)
-      .onFailed(defer(self(), [this, promise](const string& failure) {
-        LOG(WARNING) << "Connection to remove the nested container "
-                     << stringify(previousCheckContainerId.get())
-                     << " used for the command health check of task "
-                     << stringify(taskId) << " failed: " << failure;
+      .onFailed(defer(self(),
+                      [this, promise](const string& failure) {
+        LOG(WARNING) << "Connection to remove the nested container '"
+                     << previousCheckContainerId.get()
+                     << "' used for the COMMAND health check for task '"
+                     << taskId << "' failed: " << failure;
 
         // Something went wrong while sending the request, we treat this
         // as a transient failure and discard the promise.
@@ -539,14 +550,12 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
       .onReady(defer(self(), [this, promise](const Response& response) {
         if (response.code != process::http::Status::OK) {
           // The agent was unable to remove the health check container,
-          // we treat this as a transient failure and discard the
-          // promise.
+          // we treat this as a transient failure and discard the promise.
           LOG(WARNING) << "Received '" << response.status << "' ("
-                       << response.body
-                       << ") while removing the nested container "
-                       << stringify(previousCheckContainerId.get())
-                       << " used for the COMMAND health check for task"
-                       << stringify(taskId);
+                       << response.body << ") while removing the nested"
+                       << " container '" << previousCheckContainerId.get()
+                       << "' used for the COMMAND health check for task '"
+                       << taskId << "'";
 
           promise->discard();
         }
@@ -612,8 +621,8 @@ void HealthCheckerProcess::__nestedCommandHealthCheck(
                      {"Message-Accept", stringify(ContentType::PROTOBUF)},
                      {"Content-Type", stringify(ContentType::PROTOBUF)}};
 
-  // TODO(alexr): Use lambda named captures for
-  // these cached values once they are available.
+  // TODO(alexr): Use a lambda named capture for
+  // this cached value once it is available.
   const Duration timeout = checkTimeout;
 
   auto checkTimedOut = std::make_shared<bool>(false);
@@ -637,8 +646,7 @@ void HealthCheckerProcess::__nestedCommandHealthCheck(
 
       *checkTimedOut = true;
 
-      return Failure(
-          "Command timed out after " + stringify(timeout) + "; aborting");
+      return Failure("Command timed out after " + stringify(timeout));
     }))
     .onFailed(defer(self(),
                     &Self::nestedCommandHealthCheckFailure,
@@ -664,27 +672,21 @@ void HealthCheckerProcess::___nestedCommandHealthCheck(
     // The agent was unable to launch the health check container, we
     // treat this as a transient failure.
     LOG(WARNING) << "Received '" << launchResponse.status << "' ("
-                 << launchResponse.body << ") while launching command health "
-                 << "check of task " << stringify(taskId);
+                 << launchResponse.body << ") while launching COMMAND health"
+                 << " check for task '" << taskId << "'";
 
     promise->discard();
     return;
   }
 
-  // We need to make a copy so that the lambdas can capture it.
-  const TaskID _taskId = taskId;
-
   waitNestedContainer(checkContainerId)
-    .onFailed([_taskId, promise](const string& failure) {
+    .onFailed([promise](const string& failure) {
       promise->fail(
-          "Unable to get the exit code of command health check of task " +
-          stringify(_taskId) + ": " + failure);
+          "Unable to get the exit code: " + failure);
     })
-    .onReady([_taskId, promise](const Option<int>& status) -> void {
+    .onReady([promise](const Option<int>& status) -> void {
       if (status.isNone()) {
-        promise->fail(
-            "Unable to get the exit code of command health check of task " +
-            stringify(_taskId));
+        promise->fail("Unable to get the exit code");
       // TODO(gkleiman): Make sure that the following block works on Windows.
       } else if (WIFSIGNALED(status.get()) &&
                  WTERMSIG(status.get()) == SIGKILL) {
@@ -693,9 +695,7 @@ void HealthCheckerProcess::___nestedCommandHealthCheck(
         // the result.
         promise->discard();
       } else if (status.get() != 0) {
-        promise->fail(
-            "Command health check of task " + stringify(_taskId) +
-            " returned " + WSTRINGIFY(status.get()));
+        promise->fail("Command returned: " + WSTRINGIFY(status.get()));
       } else {
         promise->set(Nothing());
       }
@@ -769,10 +769,10 @@ Future<Option<int>> HealthCheckerProcess::waitNestedContainer(
                      {"Content-Type", stringify(ContentType::PROTOBUF)}};
 
   return process::http::request(request, false)
-    .repair([this](const Future<Response>& future) {
+    .repair([containerId](const Future<Response>& future) {
       return Failure(
-          "Connection to wait for a health check of task " +
-          stringify(taskId) + " failed: " + future.failure());
+          "Connection to wait for health check container '" +
+          stringify(containerId) + "' failed: " + future.failure());
     })
     .then(defer(self(),
                 &Self::_waitNestedContainer, containerId, lambda::_1));
@@ -786,7 +786,8 @@ Future<Option<int>> HealthCheckerProcess::_waitNestedContainer(
   if (httpResponse.code != process::http::Status::OK) {
     return Failure(
         "Received '" + httpResponse.status + "' (" + httpResponse.body +
-        ") while waiting on health check of task " + stringify(taskId));
+        ") while waiting on health check container '" +
+        stringify(containerId) + "'");
   }
 
   Try<agent::Response> response =
@@ -814,7 +815,8 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
   const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
                      stringify(http.port()) + path;
 
-  VLOG(1) << "Launching HTTP health check '" << url << "'";
+  VLOG(1) << "Launching HTTP health check '" << url << "'"
+          << " for task '" << taskId << "'";
 
   const vector<string> argv = {
     HTTP_CHECK_COMMAND,
@@ -845,8 +847,11 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
         " subprocess: " + s.error());
   }
 
+  // TODO(alexr): Use lambda named captures for
+  // these cached values once they are available.
   pid_t curlPid = s->pid();
   const Duration timeout = checkTimeout;
+  const TaskID _taskId = taskId;
 
   return await(
       s->status(),
@@ -854,21 +859,22 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
       process::io::read(s->err().get()))
     .after(
         timeout,
-        [timeout, curlPid](Future<tuple<Future<Option<int>>,
-                                        Future<string>,
-                                        Future<string>>> future) {
+        [timeout, curlPid, _taskId](Future<tuple<Future<Option<int>>,
+                                                 Future<string>,
+                                                 Future<string>>> future) {
       future.discard();
 
       if (curlPid != -1) {
         // Cleanup the HTTP_CHECK_COMMAND process.
-        VLOG(1) << "Killing the HTTP health check process " << curlPid;
+        VLOG(1) << "Killing the HTTP health check process '" << curlPid
+                << "' for task '" << _taskId << "'";
 
         os::killtree(curlPid, SIGKILL);
       }
 
       return Failure(
           string(HTTP_CHECK_COMMAND) + " timed out after " +
-          stringify(timeout) + "; aborting");
+          stringify(timeout));
     })
     .then(defer(self(), &Self::_httpHealthCheck, lambda::_1));
 }
@@ -943,7 +949,8 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
 
   const HealthCheck::TCPCheckInfo& tcp = check.tcp();
 
-  VLOG(1) << "Launching TCP health check at port '" << tcp.port() << "'";
+  VLOG(1) << "Launching TCP health check for task '" << taskId << "' at port"
+          << tcp.port();
 
   const string tcpConnectPath = path::join(launcherDir, TCP_CHECK_COMMAND);
 
@@ -971,8 +978,11 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
         " subprocess: " + s.error());
   }
 
+  // TODO(alexr): Use lambda named captures for
+  // these cached values once they are available.
   pid_t tcpConnectPid = s->pid();
   const Duration timeout = checkTimeout;
+  const TaskID _taskId = taskId;
 
   return await(
       s->status(),
@@ -980,21 +990,22 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
       process::io::read(s->err().get()))
     .after(
         timeout,
-        [timeout, tcpConnectPid](Future<tuple<Future<Option<int>>,
-                                              Future<string>,
-                                              Future<string>>> future) {
+        [timeout, tcpConnectPid, _taskId](Future<tuple<Future<Option<int>>,
+                                                       Future<string>,
+                                                       Future<string>>> future)
+    {
       future.discard();
 
       if (tcpConnectPid != -1) {
         // Cleanup the TCP_CHECK_COMMAND process.
-        VLOG(1) << "Killing the TCP health check process " << tcpConnectPid;
+        VLOG(1) << "Killing the TCP health check process " << tcpConnectPid
+                << " for task '" << _taskId << "'";
 
         os::killtree(tcpConnectPid, SIGKILL);
       }
 
       return Failure(
-          string(TCP_CHECK_COMMAND) + " timed out after " +
-          stringify(timeout) + "; aborting");
+          string(TCP_CHECK_COMMAND) + " timed out after " + stringify(timeout));
     })
     .then(defer(self(), &Self::_tcpHealthCheck, lambda::_1));
 }
@@ -1041,7 +1052,8 @@ void HealthCheckerProcess::scheduleNext(const Duration& duration)
 {
   CHECK(!paused);
 
-  VLOG(1) << "Scheduling health check in " << duration;
+  VLOG(1) << "Scheduling health check for task '" << taskId << "' in "
+          << duration;
 
   delay(duration, self(), &Self::performSingleCheck);
 }
@@ -1050,7 +1062,7 @@ void HealthCheckerProcess::scheduleNext(const Duration& duration)
 void HealthCheckerProcess::pause()
 {
   if (!paused) {
-    VLOG(1) << "Health checking paused";
+    VLOG(1) << "Health checking for task '" << taskId << "' paused";
 
     paused = true;
   }
@@ -1060,7 +1072,7 @@ void HealthCheckerProcess::pause()
 void HealthCheckerProcess::resume()
 {
   if (paused) {
-    VLOG(1) << "Health checking resumed";
+    VLOG(1) << "Health checking for task '" << taskId << "' resumed";
 
     paused = false;
 
@@ -1081,7 +1093,7 @@ Option<Error> healthCheck(const HealthCheck& check)
   switch (check.type()) {
     case HealthCheck::COMMAND: {
       if (!check.has_command()) {
-        return Error("Expecting 'command' to be set for command health check");
+        return Error("Expecting 'command' to be set for COMMAND health check");
       }
 
       const CommandInfo& command = check.command();

http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/tests/check_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/check_tests.cpp b/src/tests/check_tests.cpp
index 16f1c07..78ac498 100644
--- a/src/tests/check_tests.cpp
+++ b/src/tests/check_tests.cpp
@@ -1028,7 +1028,7 @@ TEST_F(CheckTest, CheckInfoValidation)
     Option<Error> validate = validation::checkInfo(checkInfo);
     EXPECT_SOME(validate);
     EXPECT_EQ(
-        "Expecting 'command' to be set for command check",
+        "Expecting 'command' to be set for COMMAND check",
         validate->message);
 
     checkInfo.set_type(CheckInfo::HTTP);
@@ -1090,7 +1090,7 @@ TEST_F(CheckTest, CheckInfoValidation)
     validate = validation::checkInfo(checkInfo);
     EXPECT_SOME(validate);
     EXPECT_EQ(
-        "The path 'healthz' of HTTP  check must start with '/'",
+        "The path 'healthz' of HTTP check must start with '/'",
         validate->message);
   }
 
@@ -1160,7 +1160,7 @@ TEST_F(CheckTest, CheckStatusInfoValidation)
     Option<Error> validate = validation::checkStatusInfo(checkStatusInfo);
     EXPECT_SOME(validate);
     EXPECT_EQ(
-        "Expecting 'command' to be set for command check's status",
+        "Expecting 'command' to be set for COMMAND check's status",
         validate->message);
 
     checkStatusInfo.set_type(CheckInfo::HTTP);