You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/03/30 17:35:41 UTC
[4/4] mesos git commit: Improved log/failure messages in the
(health)checker libraries.
Improved log/failure messages in the (health)checker libraries.
- Make log/failure messages consistent across both libraries.
- Task and container IDs are user generated and can contain spaces, so
we have to wrap them in single quotes.
- Remove the redundant task IDs from 'Failure' messages.
Review: https://reviews.apache.org/r/57854/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5c9ce378
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5c9ce378
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5c9ce378
Branch: refs/heads/master
Commit: 5c9ce378f627cbd4c2ed16f1e342dde16d5ee939
Parents: 080e1b7
Author: Gast�n Kleiman <ga...@mesosphere.io>
Authored: Thu Mar 30 12:58:38 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 30 19:34:24 2017 +0200
----------------------------------------------------------------------
src/checks/checker.cpp | 60 +++++++-------
src/checks/health_checker.cpp | 166 ++++++++++++++++++++-----------------
src/tests/check_tests.cpp | 6 +-
3 files changed, 121 insertions(+), 111 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/checks/checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/checker.cpp b/src/checks/checker.cpp
index 3f2d8d8..1664acd 100644
--- a/src/checks/checker.cpp
+++ b/src/checks/checker.cpp
@@ -102,13 +102,12 @@ static pid_t cloneWithSetns(
Try<Nothing> setns = ns::setns(taskPid.get(), ns);
if (setns.isError()) {
// This effectively aborts the check.
- LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
- << "task (pid: '" << taskPid.get() << "'): "
- << setns.error();
+ LOG(FATAL) << "Failed to enter the " << ns << " namespace of task"
+ << " (pid: " << taskPid.get() << "): " << setns.error();
}
- VLOG(1) << "Entered the " << ns << " namespace of "
- << "task (pid: '" << taskPid.get() << "') successfully";
+ VLOG(1) << "Entered the " << ns << " namespace of task"
+ << " (pid: " << taskPid.get() << ") successfully";
}
}
@@ -272,7 +271,7 @@ CheckerProcess::CheckerProcess(
void CheckerProcess::initialize()
{
- VLOG(1) << "Check configuration for task " << taskId << ":"
+ VLOG(1) << "Check configuration for task '" << taskId << "':"
<< " '" << jsonify(JSON::Protobuf(check)) << "'";
scheduleNext(checkDelay);
@@ -281,7 +280,7 @@ void CheckerProcess::initialize()
void CheckerProcess::finalize()
{
- LOG(INFO) << "Checking for task " << taskId << " stopped";
+ LOG(INFO) << "Checking for task '" << taskId << "' stopped";
}
@@ -315,7 +314,7 @@ void CheckerProcess::performCheck()
void CheckerProcess::scheduleNext(const Duration& duration)
{
- VLOG(1) << "Scheduling check for task " << taskId << " in " << duration;
+ VLOG(1) << "Scheduling check for task '" << taskId << "' in " << duration;
delay(duration, self(), &Self::performCheck);
}
@@ -325,8 +324,8 @@ void CheckerProcess::processCheckResult(
const Stopwatch& stopwatch,
const CheckStatusInfo& result)
{
- VLOG(1) << "Performed " << check.type() << " check for task " << taskId
- << " in " << stopwatch.elapsed();
+ VLOG(1) << "Performed " << check.type() << " check"
+ << " for task '" << taskId << "' in " << stopwatch.elapsed();
// Trigger the callback if check info changes.
if (result != previousCheckStatus) {
@@ -360,8 +359,8 @@ Future<int> CheckerProcess::commandCheck()
if (command.shell()) {
// Use the shell variant.
- VLOG(1) << "Launching command check '" << command.value() << "'"
- << " for task " << taskId;
+ VLOG(1) << "Launching COMMAND check '" << command.value() << "'"
+ << " for task '" << taskId << "'";
s = process::subprocess(
command.value(),
@@ -375,8 +374,8 @@ Future<int> CheckerProcess::commandCheck()
vector<string> argv(
std::begin(command.arguments()), std::end(command.arguments()));
- VLOG(1) << "Launching command check [" << command.value() << ", "
- << strings::join(", ", argv) << "] for task " << taskId;
+ VLOG(1) << "Launching COMMAND check [" << command.value() << ", "
+ << strings::join(", ", argv) << "] for task '" << taskId << "'";
s = process::subprocess(
command.value(),
@@ -407,14 +406,13 @@ Future<int> CheckerProcess::commandCheck()
if (commandPid != -1) {
// Cleanup the external command process.
- VLOG(1) << "Killing the command check process " << commandPid
- << " for task " << _taskId;
+ VLOG(1) << "Killing the COMMAND check process '" << commandPid
+ << "' for task '" << _taskId << "'";
os::killtree(commandPid, SIGKILL);
}
- return Failure(
- "Command timed out after " + stringify(timeout) + "; aborting");
+ return Failure("Command timed out after " + stringify(timeout));
})
.then([](const Option<int>& exitCode) -> Future<int> {
if (exitCode.isNone()) {
@@ -441,15 +439,16 @@ void CheckerProcess::processCommandCheckResult(
// see MESOS-7242.
if (result.isReady() && WIFEXITED(result.get())) {
const int exitCode = WEXITSTATUS(result.get());
- VLOG(1) << check.type() << " check for task "
- << taskId << " returned " << exitCode;
+ VLOG(1) << check.type() << " check for task '"
+ << taskId << "' returned: " << exitCode;
checkStatusInfo.mutable_command()->set_exit_code(
static_cast<int32_t>(exitCode));
} else {
// Check's status is currently not available, which may indicate a change
// that should be reported as an empty `CheckStatusInfo.Command` message.
- LOG(WARNING) << "Check for task " << taskId << " failed: "
+ LOG(WARNING) << check.type() << " check for task '" << taskId
+ << "' failed: "
<< (result.isFailed() ? result.failure() : "discarded");
checkStatusInfo.mutable_command();
@@ -471,7 +470,7 @@ Future<int> CheckerProcess::httpCheck()
const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
stringify(http.port()) + path;
- VLOG(1) << "Launching HTTP check '" << url << "' for task " << taskId;
+ VLOG(1) << "Launching HTTP check '" << url << "' for task '" << taskId << "'";
const vector<string> argv = {
HTTP_CHECK_COMMAND,
@@ -522,14 +521,14 @@ Future<int> CheckerProcess::httpCheck()
if (curlPid != -1) {
// Cleanup the HTTP_CHECK_COMMAND process.
VLOG(1) << "Killing the HTTP check process " << curlPid
- << " for task " << _taskId;
+ << " for task '" << _taskId << "'";
os::killtree(curlPid, SIGKILL);
}
return Failure(
string(HTTP_CHECK_COMMAND) + " timed out after " +
- stringify(timeout) + "; aborting");
+ stringify(timeout));
})
.then(defer(self(), &Self::_httpCheck, lambda::_1));
}
@@ -595,15 +594,15 @@ void CheckerProcess::processHttpCheckResult(
checkStatusInfo.set_type(check.type());
if (result.isReady()) {
- VLOG(1) << check.type() << " check for task "
- << taskId << " returned " << result.get();
+ VLOG(1) << check.type() << " check for task '"
+ << taskId << "' returned: " << result.get();
checkStatusInfo.mutable_http()->set_status_code(
static_cast<uint32_t>(result.get()));
} else {
// Check's status is currently not available, which may indicate a change
// that should be reported as an empty `CheckStatusInfo.Http` message.
- LOG(WARNING) << "Check for task " << taskId << " failed: "
+ LOG(WARNING) << "Check for task '" << taskId << "' failed: "
<< (result.isFailed() ? result.failure() : "discarded");
checkStatusInfo.mutable_http();
@@ -623,7 +622,7 @@ Option<Error> checkInfo(const CheckInfo& checkInfo)
switch (checkInfo.type()) {
case CheckInfo::COMMAND: {
if (!checkInfo.has_command()) {
- return Error("Expecting 'command' to be set for command check");
+ return Error("Expecting 'command' to be set for COMMAND check");
}
const CommandInfo& command = checkInfo.command().command();
@@ -656,8 +655,7 @@ Option<Error> checkInfo(const CheckInfo& checkInfo)
if (http.has_path() && !strings::startsWith(http.path(), '/')) {
return Error(
- "The path '" + http.path() +
- "' of HTTP check must start with '/'");
+ "The path '" + http.path() + "' of HTTP check must start with '/'");
}
break;
@@ -696,7 +694,7 @@ Option<Error> checkStatusInfo(const CheckStatusInfo& checkStatusInfo)
case CheckInfo::COMMAND: {
if (!checkStatusInfo.has_command()) {
return Error(
- "Expecting 'command' to be set for command check's status");
+ "Expecting 'command' to be set for COMMAND check's status");
}
break;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/checks/health_checker.cpp
----------------------------------------------------------------------
diff --git a/src/checks/health_checker.cpp b/src/checks/health_checker.cpp
index b768c5b..769278c 100644
--- a/src/checks/health_checker.cpp
+++ b/src/checks/health_checker.cpp
@@ -116,13 +116,12 @@ static pid_t cloneWithSetns(
Try<Nothing> setns = ns::setns(taskPid.get(), ns);
if (setns.isError()) {
// This effectively aborts the health check.
- LOG(FATAL) << "Failed to enter the " << ns << " namespace of "
- << "task (pid: '" << taskPid.get() << "'): "
- << setns.error();
+ LOG(FATAL) << "Failed to enter the " << ns << " namespace of task"
+ << " (pid: '" << taskPid.get() << "'): " << setns.error();
}
- VLOG(1) << "Entered the " << ns << " namespace of "
- << "task (pid: '" << taskPid.get() << "') successfully";
+ VLOG(1) << "Entered the " << ns << " namespace of task"
+ << " (pid: '" << taskPid.get() << "') successfully";
}
}
@@ -269,7 +268,7 @@ HealthCheckerProcess::HealthCheckerProcess(
void HealthCheckerProcess::initialize()
{
- VLOG(1) << "Health check configuration:"
+ VLOG(1) << "Health check configuration for task '" << taskId << "':"
<< " '" << jsonify(JSON::Protobuf(check)) << "'";
startTime = Clock::now();
@@ -283,14 +282,17 @@ void HealthCheckerProcess::failure(const string& message)
if (initializing &&
checkGracePeriod.secs() > 0 &&
(Clock::now() - startTime) <= checkGracePeriod) {
- LOG(INFO) << "Ignoring failure as health check still in grace period";
+ LOG(INFO) << "Ignoring failure of "
+ << HealthCheck::Type_Name(check.type()) << " health check for"
+ << " task '" << taskId << "': still in grace period";
scheduleNext(checkInterval);
return;
}
consecutiveFailures++;
- LOG(WARNING) << "Health check failed " << consecutiveFailures
- << " times consecutively: " << message;
+ LOG(WARNING) << HealthCheck::Type_Name(check.type())
+ << " health check for task '" << taskId << "' failed "
+ << consecutiveFailures << " times consecutively: " << message;
bool killTask = consecutiveFailures >= check.consecutive_failures();
@@ -314,7 +316,8 @@ void HealthCheckerProcess::failure(const string& message)
void HealthCheckerProcess::success()
{
- VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check passed";
+ VLOG(1) << HealthCheck::Type_Name(check.type()) << " health check for task '"
+ << taskId << "' passed";
// Send a healthy status update on the first success,
// and on the first success following failure(s).
@@ -376,20 +379,22 @@ void HealthCheckerProcess::processCheckResult(
{
// `HealthChecker` might have been paused while performing the check.
if (paused) {
- LOG(INFO) << "Ignoring health check result of task " + stringify(taskId) +
- " (health checking is paused)";
+ LOG(INFO) << "Ignoring " << HealthCheck::Type_Name(check.type())
+ << " health check result for task '" << taskId
+ << "': health checking is paused";
return;
}
if (future.isDiscarded()) {
- LOG(INFO) << HealthCheck::Type_Name(check.type()) +
- " health check of task " + stringify(taskId) + " discarded";
+ LOG(INFO) << HealthCheck::Type_Name(check.type()) << " health check for"
+ << " task '" << taskId << "' discarded";
scheduleNext(checkInterval);
return;
}
VLOG(1) << "Performed " << HealthCheck::Type_Name(check.type())
- << " health check in " << stopwatch.elapsed();
+ << " health check for task '" << taskId << "' in "
+ << stopwatch.elapsed();
if (future.isReady()) {
success();
@@ -397,7 +402,8 @@ void HealthCheckerProcess::processCheckResult(
}
string message = HealthCheck::Type_Name(check.type()) +
- " health check failed: " + future.failure();
+ " health check for task '" + stringify(taskId) +
+ "' failed: " + future.failure();
failure(message);
}
@@ -422,7 +428,8 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
if (command.shell()) {
// Use the shell variant.
- VLOG(1) << "Launching command health check '" << command.value() << "'";
+ VLOG(1) << "Launching COMMAND health check '" << command.value() << "'"
+ << " for task '" << taskId << "'";
external = subprocess(
command.value(),
@@ -438,8 +445,8 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
argv.push_back(arg);
}
- VLOG(1) << "Launching command health check [" << command.value() << ", "
- << strings::join(", ", argv) << "]";
+ VLOG(1) << "Launching COMMAND health check [" << command.value() << ", "
+ << strings::join(", ", argv) << "] for task '" << taskId << "'";
external = subprocess(
command.value(),
@@ -456,24 +463,27 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
return Failure("Failed to create subprocess: " + external.error());
}
+ // TODO(alexr): Use lambda named captures for
+ // these cached values once they are available.
pid_t commandPid = external->pid();
const Duration timeout = checkTimeout;
+ const TaskID _taskId = taskId;
return external->status()
.after(
timeout,
- [timeout, commandPid](Future<Option<int>> future) {
+ [timeout, commandPid, _taskId](Future<Option<int>> future) {
future.discard();
if (commandPid != -1) {
// Cleanup the external command process.
- VLOG(1) << "Killing the command health check process " << commandPid;
+ VLOG(1) << "Killing the COMMAND health check process '" << commandPid
+ << "' for task '" << _taskId << "'";
os::killtree(commandPid, SIGKILL);
}
- return Failure(
- "Command timed out after " + stringify(timeout) + "; aborting");
+ return Failure("Command timed out after " + stringify(timeout));
})
.then([](const Option<int>& status) -> Future<Nothing> {
if (status.isNone()) {
@@ -482,7 +492,7 @@ Future<Nothing> HealthCheckerProcess::commandHealthCheck()
int statusCode = status.get();
if (statusCode != 0) {
- return Failure("Command returned " + WSTRINGIFY(statusCode));
+ return Failure("Command returned: " + WSTRINGIFY(statusCode));
}
return Nothing();
@@ -497,7 +507,7 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
CHECK(check.has_command());
CHECK_SOME(agentURL);
- VLOG(1) << "Launching command health check of task " << stringify(taskId);
+ VLOG(1) << "Launching COMMAND health check for task '" << taskId << "'";
// We don't want recoverable errors, e.g., the agent responding with
// HTTP status code 503, to trigger a health check failure.
@@ -526,11 +536,12 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
{"Content-Type", stringify(ContentType::PROTOBUF)}};
process::http::request(request, false)
- .onFailed(defer(self(), [this, promise](const string& failure) {
- LOG(WARNING) << "Connection to remove the nested container "
- << stringify(previousCheckContainerId.get())
- << " used for the command health check of task "
- << stringify(taskId) << " failed: " << failure;
+ .onFailed(defer(self(),
+ [this, promise](const string& failure) {
+ LOG(WARNING) << "Connection to remove the nested container '"
+ << previousCheckContainerId.get()
+ << "' used for the COMMAND health check for task '"
+ << taskId << "' failed: " << failure;
// Something went wrong while sending the request, we treat this
// as a transient failure and discard the promise.
@@ -539,14 +550,12 @@ Future<Nothing> HealthCheckerProcess::nestedCommandHealthCheck()
.onReady(defer(self(), [this, promise](const Response& response) {
if (response.code != process::http::Status::OK) {
// The agent was unable to remove the health check container,
- // we treat this as a transient failure and discard the
- // promise.
+ // we treat this as a transient failure and discard the promise.
LOG(WARNING) << "Received '" << response.status << "' ("
- << response.body
- << ") while removing the nested container "
- << stringify(previousCheckContainerId.get())
- << " used for the COMMAND health check for task"
- << stringify(taskId);
+ << response.body << ") while removing the nested"
+ << " container '" << previousCheckContainerId.get()
+ << "' used for the COMMAND health check for task '"
+ << taskId << "'";
promise->discard();
}
@@ -612,8 +621,8 @@ void HealthCheckerProcess::__nestedCommandHealthCheck(
{"Message-Accept", stringify(ContentType::PROTOBUF)},
{"Content-Type", stringify(ContentType::PROTOBUF)}};
- // TODO(alexr): Use lambda named captures for
- // these cached values once they are available.
+ // TODO(alexr): Use a lambda named capture for
+ // this cached value once it is available.
const Duration timeout = checkTimeout;
auto checkTimedOut = std::make_shared<bool>(false);
@@ -637,8 +646,7 @@ void HealthCheckerProcess::__nestedCommandHealthCheck(
*checkTimedOut = true;
- return Failure(
- "Command timed out after " + stringify(timeout) + "; aborting");
+ return Failure("Command timed out after " + stringify(timeout));
}))
.onFailed(defer(self(),
&Self::nestedCommandHealthCheckFailure,
@@ -664,27 +672,21 @@ void HealthCheckerProcess::___nestedCommandHealthCheck(
// The agent was unable to launch the health check container, we
// treat this as a transient failure.
LOG(WARNING) << "Received '" << launchResponse.status << "' ("
- << launchResponse.body << ") while launching command health "
- << "check of task " << stringify(taskId);
+ << launchResponse.body << ") while launching COMMAND health"
+ << " check for task '" << taskId << "'";
promise->discard();
return;
}
- // We need to make a copy so that the lambdas can capture it.
- const TaskID _taskId = taskId;
-
waitNestedContainer(checkContainerId)
- .onFailed([_taskId, promise](const string& failure) {
+ .onFailed([promise](const string& failure) {
promise->fail(
- "Unable to get the exit code of command health check of task " +
- stringify(_taskId) + ": " + failure);
+ "Unable to get the exit code: " + failure);
})
- .onReady([_taskId, promise](const Option<int>& status) -> void {
+ .onReady([promise](const Option<int>& status) -> void {
if (status.isNone()) {
- promise->fail(
- "Unable to get the exit code of command health check of task " +
- stringify(_taskId));
+ promise->fail("Unable to get the exit code");
// TODO(gkleiman): Make sure that the following block works on Windows.
} else if (WIFSIGNALED(status.get()) &&
WTERMSIG(status.get()) == SIGKILL) {
@@ -693,9 +695,7 @@ void HealthCheckerProcess::___nestedCommandHealthCheck(
// the result.
promise->discard();
} else if (status.get() != 0) {
- promise->fail(
- "Command health check of task " + stringify(_taskId) +
- " returned " + WSTRINGIFY(status.get()));
+ promise->fail("Command returned: " + WSTRINGIFY(status.get()));
} else {
promise->set(Nothing());
}
@@ -769,10 +769,10 @@ Future<Option<int>> HealthCheckerProcess::waitNestedContainer(
{"Content-Type", stringify(ContentType::PROTOBUF)}};
return process::http::request(request, false)
- .repair([this](const Future<Response>& future) {
+ .repair([containerId](const Future<Response>& future) {
return Failure(
- "Connection to wait for a health check of task " +
- stringify(taskId) + " failed: " + future.failure());
+ "Connection to wait for health check container '" +
+ stringify(containerId) + "' failed: " + future.failure());
})
.then(defer(self(),
&Self::_waitNestedContainer, containerId, lambda::_1));
@@ -786,7 +786,8 @@ Future<Option<int>> HealthCheckerProcess::_waitNestedContainer(
if (httpResponse.code != process::http::Status::OK) {
return Failure(
"Received '" + httpResponse.status + "' (" + httpResponse.body +
- ") while waiting on health check of task " + stringify(taskId));
+ ") while waiting on health check container '" +
+ stringify(containerId) + "'");
}
Try<agent::Response> response =
@@ -814,7 +815,8 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
const string url = scheme + "://" + DEFAULT_DOMAIN + ":" +
stringify(http.port()) + path;
- VLOG(1) << "Launching HTTP health check '" << url << "'";
+ VLOG(1) << "Launching HTTP health check '" << url << "'"
+ << " for task '" << taskId << "'";
const vector<string> argv = {
HTTP_CHECK_COMMAND,
@@ -845,8 +847,11 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
" subprocess: " + s.error());
}
+ // TODO(alexr): Use lambda named captures for
+ // these cached values once they are available.
pid_t curlPid = s->pid();
const Duration timeout = checkTimeout;
+ const TaskID _taskId = taskId;
return await(
s->status(),
@@ -854,21 +859,22 @@ Future<Nothing> HealthCheckerProcess::httpHealthCheck()
process::io::read(s->err().get()))
.after(
timeout,
- [timeout, curlPid](Future<tuple<Future<Option<int>>,
- Future<string>,
- Future<string>>> future) {
+ [timeout, curlPid, _taskId](Future<tuple<Future<Option<int>>,
+ Future<string>,
+ Future<string>>> future) {
future.discard();
if (curlPid != -1) {
// Cleanup the HTTP_CHECK_COMMAND process.
- VLOG(1) << "Killing the HTTP health check process " << curlPid;
+ VLOG(1) << "Killing the HTTP health check process '" << curlPid
+ << "' for task '" << _taskId << "'";
os::killtree(curlPid, SIGKILL);
}
return Failure(
string(HTTP_CHECK_COMMAND) + " timed out after " +
- stringify(timeout) + "; aborting");
+ stringify(timeout));
})
.then(defer(self(), &Self::_httpHealthCheck, lambda::_1));
}
@@ -943,7 +949,8 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
const HealthCheck::TCPCheckInfo& tcp = check.tcp();
- VLOG(1) << "Launching TCP health check at port '" << tcp.port() << "'";
+ VLOG(1) << "Launching TCP health check for task '" << taskId << "' at port"
+ << tcp.port();
const string tcpConnectPath = path::join(launcherDir, TCP_CHECK_COMMAND);
@@ -971,8 +978,11 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
" subprocess: " + s.error());
}
+ // TODO(alexr): Use lambda named captures for
+ // these cached values once they are available.
pid_t tcpConnectPid = s->pid();
const Duration timeout = checkTimeout;
+ const TaskID _taskId = taskId;
return await(
s->status(),
@@ -980,21 +990,22 @@ Future<Nothing> HealthCheckerProcess::tcpHealthCheck()
process::io::read(s->err().get()))
.after(
timeout,
- [timeout, tcpConnectPid](Future<tuple<Future<Option<int>>,
- Future<string>,
- Future<string>>> future) {
+ [timeout, tcpConnectPid, _taskId](Future<tuple<Future<Option<int>>,
+ Future<string>,
+ Future<string>>> future)
+ {
future.discard();
if (tcpConnectPid != -1) {
// Cleanup the TCP_CHECK_COMMAND process.
- VLOG(1) << "Killing the TCP health check process " << tcpConnectPid;
+ VLOG(1) << "Killing the TCP health check process " << tcpConnectPid
+ << " for task '" << _taskId << "'";
os::killtree(tcpConnectPid, SIGKILL);
}
return Failure(
- string(TCP_CHECK_COMMAND) + " timed out after " +
- stringify(timeout) + "; aborting");
+ string(TCP_CHECK_COMMAND) + " timed out after " + stringify(timeout));
})
.then(defer(self(), &Self::_tcpHealthCheck, lambda::_1));
}
@@ -1041,7 +1052,8 @@ void HealthCheckerProcess::scheduleNext(const Duration& duration)
{
CHECK(!paused);
- VLOG(1) << "Scheduling health check in " << duration;
+ VLOG(1) << "Scheduling health check for task '" << taskId << "' in "
+ << duration;
delay(duration, self(), &Self::performSingleCheck);
}
@@ -1050,7 +1062,7 @@ void HealthCheckerProcess::scheduleNext(const Duration& duration)
void HealthCheckerProcess::pause()
{
if (!paused) {
- VLOG(1) << "Health checking paused";
+ VLOG(1) << "Health checking for task '" << taskId << "' paused";
paused = true;
}
@@ -1060,7 +1072,7 @@ void HealthCheckerProcess::pause()
void HealthCheckerProcess::resume()
{
if (paused) {
- VLOG(1) << "Health checking resumed";
+ VLOG(1) << "Health checking for task '" << taskId << "' resumed";
paused = false;
@@ -1081,7 +1093,7 @@ Option<Error> healthCheck(const HealthCheck& check)
switch (check.type()) {
case HealthCheck::COMMAND: {
if (!check.has_command()) {
- return Error("Expecting 'command' to be set for command health check");
+ return Error("Expecting 'command' to be set for COMMAND health check");
}
const CommandInfo& command = check.command();
http://git-wip-us.apache.org/repos/asf/mesos/blob/5c9ce378/src/tests/check_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/check_tests.cpp b/src/tests/check_tests.cpp
index 16f1c07..78ac498 100644
--- a/src/tests/check_tests.cpp
+++ b/src/tests/check_tests.cpp
@@ -1028,7 +1028,7 @@ TEST_F(CheckTest, CheckInfoValidation)
Option<Error> validate = validation::checkInfo(checkInfo);
EXPECT_SOME(validate);
EXPECT_EQ(
- "Expecting 'command' to be set for command check",
+ "Expecting 'command' to be set for COMMAND check",
validate->message);
checkInfo.set_type(CheckInfo::HTTP);
@@ -1090,7 +1090,7 @@ TEST_F(CheckTest, CheckInfoValidation)
validate = validation::checkInfo(checkInfo);
EXPECT_SOME(validate);
EXPECT_EQ(
- "The path 'healthz' of HTTP check must start with '/'",
+ "The path 'healthz' of HTTP check must start with '/'",
validate->message);
}
@@ -1160,7 +1160,7 @@ TEST_F(CheckTest, CheckStatusInfoValidation)
Option<Error> validate = validation::checkStatusInfo(checkStatusInfo);
EXPECT_SOME(validate);
EXPECT_EQ(
- "Expecting 'command' to be set for command check's status",
+ "Expecting 'command' to be set for COMMAND check's status",
validate->message);
checkStatusInfo.set_type(CheckInfo::HTTP);