You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/07 04:58:05 UTC
[4/5] git commit: Clarified the guidance for users when slave
recovery fails.
Clarified the guidance for users when slave recovery fails.
Review: https://reviews.apache.org/r/13261
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d88f1e3f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d88f1e3f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d88f1e3f
Branch: refs/heads/master
Commit: d88f1e3f3e33b8c6b66facb7e12799679299c6ae
Parents: a20ce4d
Author: Vinod Kone <vi...@twitter.com>
Authored: Sat Aug 3 14:29:43 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Aug 6 19:57:27 2013 -0700
----------------------------------------------------------------------
src/slave/slave.cpp | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/d88f1e3f/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 7378e01..3b49118 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -391,8 +391,13 @@ void Slave::initialize()
void Slave::_initialize(const Future<Nothing>& future)
{
if (!future.isReady()) {
- LOG(FATAL) << "Recovery failure: "
- << (future.isFailed() ? future.failure() : "future discarded");
+ EXIT(1)
+ << "Failed to perform recovery: "
+ << (future.isFailed() ? future.failure() : "future discarded") << "\n"
+ << "To remedy this do as follows:\n"
+ << "Step 1: rm -f " << paths::getLatestSlavePath(metaDir) << "\n"
+ << " This ensures slave doesn't recover old live executors.\n"
+ << "Step 2: Restart the slave.";
}
LOG(INFO) << "Finished recovery";
@@ -2552,7 +2557,12 @@ Future<Nothing> Slave::recover(bool reconnect, bool strict)
// First, recover the slave state.
Result<SlaveState> state = state::recover(metaDir, strict);
if (state.isError()) {
- EXIT(1) << "Failed to recover slave state: " << state.error();
+ EXIT(1)
+ << "Failed to recover slave state: " << state.error() << "\n"
+ << "To remedy this try the following:\n"
+ << (flags.strict
+ ? "Restart the slave with '--no-strict' flag (partial recovery)"
+ : "rm '" + paths::getLatestSlavePath(metaDir) + "' (no recovery)");
}
if (state.isNone() || state.get().info.isNone()) {
@@ -2574,9 +2584,9 @@ Future<Nothing> Slave::recover(bool reconnect, bool strict)
<< "Old slave info:\n" << state.get().info.get() << "\n"
<< "New slave info:\n" << info << "\n"
<< "To properly upgrade the slave do as follows:\n"
- << "Step 1: Start the slave (old slave info) with --recover=cleanup.\n"
+ << "Step 1: Start the slave with --recover=cleanup.\n"
<< "Step 2: Wait till the slave kills all executors and shuts down.\n"
- << "Step 3: Start the upgraded slave (new slave info).\n";
+ << "Step 3: Start the upgraded slave with --recover=reconnect.\n";
}
info = state.get().info.get(); // Recover the slave info.