You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/09/24 21:35:49 UTC
git commit: Empty files are no longer considered as recovery errors.
Updated Branches:
refs/heads/master 45e2568f5 -> 58955f9c9
Empty files are no longer considered as recovery errors.
Review: https://reviews.apache.org/r/14057
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/58955f9c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/58955f9c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/58955f9c
Branch: refs/heads/master
Commit: 58955f9c90bb663607d47d2362ed29c7cac1b4c8
Parents: 45e2568
Author: Vinod Kone <vi...@twitter.com>
Authored: Mon Sep 9 17:29:35 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Sep 24 12:27:35 2013 -0700
----------------------------------------------------------------------
src/slave/state.cpp | 73 +++++++++++++++++++++++++++++++++++++++---------
1 file changed, 60 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/58955f9c/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index c03db32..5208e4e 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -79,9 +79,9 @@ Try<SlaveState> SlaveState::recover(
const Result<SlaveInfo>& slaveInfo = ::protobuf::read<SlaveInfo>(path);
- if (!slaveInfo.isSome()) {
+ if (slaveInfo.isError()) {
const string& message = "Failed to read slave info from '" + path + "': " +
- (slaveInfo.isError() ? slaveInfo.error() : " none");
+ slaveInfo.error();
if (strict) {
return Error(message);
} else {
@@ -91,6 +91,13 @@ Try<SlaveState> SlaveState::recover(
}
}
+ if (slaveInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty slave info file '" << path << "'";
+ return state;
+ }
+
state.info = slaveInfo.get();
// Find the frameworks.
@@ -146,9 +153,9 @@ Try<FrameworkState> FrameworkState::recover(
const Result<FrameworkInfo>& frameworkInfo =
::protobuf::read<FrameworkInfo>(path);
- if (!frameworkInfo.isSome()) {
+ if (frameworkInfo.isError()) {
message = "Failed to read framework info from '" + path + "': " +
- (frameworkInfo.isError() ? frameworkInfo.error() : " none");
+ frameworkInfo.error();
if (strict) {
return Error(message);
@@ -159,6 +166,13 @@ Try<FrameworkState> FrameworkState::recover(
}
}
+ if (frameworkInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty framework info file '" << path << "'";
+ return state;
+ }
+
state.info = frameworkInfo.get();
// Read the framework pid.
@@ -185,6 +199,13 @@ Try<FrameworkState> FrameworkState::recover(
}
}
+ if (pid.get().empty()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty framework pid file '" << path << "'";
+ return state;
+ }
+
state.pid = process::UPID(pid.get());
// Find the executors.
@@ -242,10 +263,9 @@ Try<ExecutorState> ExecutorState::recover(
const Result<ExecutorInfo>& executorInfo =
::protobuf::read<ExecutorInfo>(path);
- if (!executorInfo.isSome()) {
- message =
- "Failed to read executor info from '" + path +
- "': " + (executorInfo.isError() ? executorInfo.error() : " none");
+ if (executorInfo.isError()) {
+ message = "Failed to read executor info from '" + path + "': " +
+ executorInfo.error();
if (strict) {
return Error(message);
@@ -256,6 +276,13 @@ Try<ExecutorState> ExecutorState::recover(
}
}
+ if (executorInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty executor info file '" << path << "'";
+ return state;
+ }
+
state.info = executorInfo.get();
// Find the runs.
@@ -371,7 +398,7 @@ Try<RunState> RunState::recover(
Try<string> pid = os::read(path);
if (pid.isError()) {
- message = "Failed to read executor's forked pid from '" + path +
+ message = "Failed to read executor forked pid from '" + path +
"': " + pid.error();
if (strict) {
@@ -383,6 +410,13 @@ Try<RunState> RunState::recover(
}
}
+ if (pid.get().empty()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty executor forked pid file '" << path << "'";
+ return state;
+ }
+
Try<pid_t> forkedPid = numify<pid_t>(pid.get());
if (forkedPid.isError()) {
return Error("Failed to parse forked pid " + pid.get() +
@@ -406,7 +440,7 @@ Try<RunState> RunState::recover(
pid = os::read(path);
if (pid.isError()) {
- message = "Failed to read executor's libprocess pid from '" + path +
+ message = "Failed to read executor libprocess pid from '" + path +
"': " + pid.error();
if (strict) {
@@ -418,6 +452,13 @@ Try<RunState> RunState::recover(
}
}
+ if (pid.get().empty()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'";
+ return state;
+ }
+
state.libprocessPid = process::UPID(pid.get());
// See if the sentinel file exists.
@@ -455,9 +496,8 @@ Try<TaskState> TaskState::recover(
const Result<Task>& task = ::protobuf::read<Task>(path);
- if (!task.isSome()) {
- message = "Failed to read task info from '" + path +
- "': " + (task.isError() ? task.error() : " none");
+ if (task.isError()) {
+ message = "Failed to read task info from '" + path + "': " + task.error();
if (strict) {
return Error(message);
@@ -468,6 +508,13 @@ Try<TaskState> TaskState::recover(
}
}
+ if (task.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty task info file '" << path << "'";
+ return state;
+ }
+
state.info = task.get();
// Read the status updates.