You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/07 04:58:05 UTC

[4/5] git commit: Clarified the guidance for users when slave recovery fails.

Clarified the guidance for users when slave recovery fails.

Review: https://reviews.apache.org/r/13261


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d88f1e3f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d88f1e3f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d88f1e3f

Branch: refs/heads/master
Commit: d88f1e3f3e33b8c6b66facb7e12799679299c6ae
Parents: a20ce4d
Author: Vinod Kone <vi...@twitter.com>
Authored: Sat Aug 3 14:29:43 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Aug 6 19:57:27 2013 -0700

----------------------------------------------------------------------
 src/slave/slave.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d88f1e3f/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 7378e01..3b49118 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -391,8 +391,13 @@ void Slave::initialize()
 void Slave::_initialize(const Future<Nothing>& future)
 {
   if (!future.isReady()) {
-    LOG(FATAL) << "Recovery failure: "
-               << (future.isFailed() ? future.failure() : "future discarded");
+    EXIT(1)
+      << "Failed to perform recovery: "
+      << (future.isFailed() ? future.failure() : "future discarded") << "\n"
+      << "To remedy this do as follows:\n"
+      << "Step 1: rm -f " << paths::getLatestSlavePath(metaDir) << "\n"
+      << "        This ensures slave doesn't recover old live executors.\n"
+      << "Step 2: Restart the slave.";
   }
 
   LOG(INFO) << "Finished recovery";
@@ -2552,7 +2557,12 @@ Future<Nothing> Slave::recover(bool reconnect, bool strict)
   // First, recover the slave state.
   Result<SlaveState> state = state::recover(metaDir, strict);
   if (state.isError()) {
-    EXIT(1) << "Failed to recover slave state: " << state.error();
+    EXIT(1)
+      << "Failed to recover slave state: " << state.error() << "\n"
+      << "To remedy this try the following:\n"
+      << (flags.strict
+          ? "Restart the slave with '--no-strict' flag (partial recovery)"
+          : "rm '" + paths::getLatestSlavePath(metaDir) + "' (no recovery)");
   }
 
   if (state.isNone() || state.get().info.isNone()) {
@@ -2574,9 +2584,9 @@ Future<Nothing> Slave::recover(bool reconnect, bool strict)
       << "Old slave info:\n" << state.get().info.get() << "\n"
       << "New slave info:\n" << info << "\n"
       << "To properly upgrade the slave do as follows:\n"
-      << "Step 1: Start the slave (old slave info) with --recover=cleanup.\n"
+      << "Step 1: Start the slave with --recover=cleanup.\n"
       << "Step 2: Wait till the slave kills all executors and shuts down.\n"
-      << "Step 3: Start the upgraded slave (new slave info).\n";
+      << "Step 3: Start the upgraded slave with --recover=reconnect.\n";
   }
 
   info = state.get().info.get(); // Recover the slave info.