You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/09/24 21:35:49 UTC

git commit: Empty files are no longer considered as recovery errors.

Updated Branches:
  refs/heads/master 45e2568f5 -> 58955f9c9


Empty files are no longer considered as recovery errors.

Review: https://reviews.apache.org/r/14057


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/58955f9c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/58955f9c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/58955f9c

Branch: refs/heads/master
Commit: 58955f9c90bb663607d47d2362ed29c7cac1b4c8
Parents: 45e2568
Author: Vinod Kone <vi...@twitter.com>
Authored: Mon Sep 9 17:29:35 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Sep 24 12:27:35 2013 -0700

----------------------------------------------------------------------
 src/slave/state.cpp | 73 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/58955f9c/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index c03db32..5208e4e 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -79,9 +79,9 @@ Try<SlaveState> SlaveState::recover(
 
   const Result<SlaveInfo>& slaveInfo = ::protobuf::read<SlaveInfo>(path);
 
-  if (!slaveInfo.isSome()) {
+  if (slaveInfo.isError()) {
     const string& message = "Failed to read slave info from '" + path + "': " +
-                            (slaveInfo.isError() ? slaveInfo.error() : " none");
+                            slaveInfo.error();
     if (strict) {
       return Error(message);
     } else {
@@ -91,6 +91,13 @@ Try<SlaveState> SlaveState::recover(
     }
   }
 
+  if (slaveInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty slave info file '" << path << "'";
+    return state;
+  }
+
   state.info = slaveInfo.get();
 
   // Find the frameworks.
@@ -146,9 +153,9 @@ Try<FrameworkState> FrameworkState::recover(
   const Result<FrameworkInfo>& frameworkInfo =
     ::protobuf::read<FrameworkInfo>(path);
 
-  if (!frameworkInfo.isSome()) {
+  if (frameworkInfo.isError()) {
     message = "Failed to read framework info from '" + path + "': " +
-              (frameworkInfo.isError() ? frameworkInfo.error() : " none");
+              frameworkInfo.error();
 
     if (strict) {
       return Error(message);
@@ -159,6 +166,13 @@ Try<FrameworkState> FrameworkState::recover(
     }
   }
 
+  if (frameworkInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty framework info file '" << path << "'";
+    return state;
+  }
+
   state.info = frameworkInfo.get();
 
   // Read the framework pid.
@@ -185,6 +199,13 @@ Try<FrameworkState> FrameworkState::recover(
     }
   }
 
+  if (pid.get().empty()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty framework pid file '" << path << "'";
+    return state;
+  }
+
   state.pid = process::UPID(pid.get());
 
   // Find the executors.
@@ -242,10 +263,9 @@ Try<ExecutorState> ExecutorState::recover(
   const Result<ExecutorInfo>& executorInfo =
     ::protobuf::read<ExecutorInfo>(path);
 
-  if (!executorInfo.isSome()) {
-    message =
-      "Failed to read executor info from '" + path +
-      "': " + (executorInfo.isError() ? executorInfo.error() : " none");
+  if (executorInfo.isError()) {
+    message = "Failed to read executor info from '" + path + "': " +
+              executorInfo.error();
 
     if (strict) {
       return Error(message);
@@ -256,6 +276,13 @@ Try<ExecutorState> ExecutorState::recover(
     }
   }
 
+  if (executorInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty executor info file '" << path << "'";
+    return state;
+  }
+
   state.info = executorInfo.get();
 
   // Find the runs.
@@ -371,7 +398,7 @@ Try<RunState> RunState::recover(
   Try<string> pid = os::read(path);
 
   if (pid.isError()) {
-    message = "Failed to read executor's forked pid from '" + path +
+    message = "Failed to read executor forked pid from '" + path +
               "': " + pid.error();
 
     if (strict) {
@@ -383,6 +410,13 @@ Try<RunState> RunState::recover(
     }
   }
 
+  if (pid.get().empty()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty executor forked pid file '" << path << "'";
+    return state;
+  }
+
   Try<pid_t> forkedPid = numify<pid_t>(pid.get());
   if (forkedPid.isError()) {
     return Error("Failed to parse forked pid " + pid.get() +
@@ -406,7 +440,7 @@ Try<RunState> RunState::recover(
   pid = os::read(path);
 
   if (pid.isError()) {
-    message = "Failed to read executor's libprocess pid from '" + path +
+    message = "Failed to read executor libprocess pid from '" + path +
               "': " + pid.error();
 
     if (strict) {
@@ -418,6 +452,13 @@ Try<RunState> RunState::recover(
     }
   }
 
+  if (pid.get().empty()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty executor libprocess pid file '" << path << "'";
+    return state;
+  }
+
   state.libprocessPid = process::UPID(pid.get());
 
   // See if the sentinel file exists.
@@ -455,9 +496,8 @@ Try<TaskState> TaskState::recover(
 
   const Result<Task>& task = ::protobuf::read<Task>(path);
 
-  if (!task.isSome()) {
-    message = "Failed to read task info from '" + path +
-              "': " + (task.isError() ? task.error() : " none");
+  if (task.isError()) {
+    message = "Failed to read task info from '" + path + "': " + task.error();
 
     if (strict) {
       return Error(message);
@@ -468,6 +508,13 @@ Try<TaskState> TaskState::recover(
     }
   }
 
+  if (task.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty task info file '" << path << "'";
+    return state;
+  }
+
   state.info = task.get();
 
   // Read the status updates.