You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/16 21:04:39 UTC

[2/3] git commit: Fixed slave state recovery to allow for partial status updates writes.

Fixed slave state recovery to allow for partial status updates writes.

Review: https://reviews.apache.org/r/13624


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a8cefa47
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a8cefa47
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a8cefa47

Branch: refs/heads/master
Commit: a8cefa47b32c3e6d64f47206141d78b0d559d6e4
Parents: 76946c9
Author: Vinod Kone <vi...@twitter.com>
Authored: Thu Aug 15 20:01:15 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri Aug 16 11:58:22 2013 -0700

----------------------------------------------------------------------
 src/slave/state.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a8cefa47/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index cd74e41..aab8d0b 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -492,7 +492,8 @@ Try<TaskState> TaskState::recover(
   // Now, read the updates.
   Result<StatusUpdateRecord> record = None();
   while (true) {
-    record = ::protobuf::read<StatusUpdateRecord>(fd.get());
+    // Ignore errors due to partial protobuf read.
+    record = ::protobuf::read<StatusUpdateRecord>(fd.get(), true);
 
     if (!record.isSome()) {
       break;
@@ -505,6 +506,15 @@ Try<TaskState> TaskState::recover(
     }
   }
 
+  // Always truncate the file to contain only valid updates.
+  // NOTE: This is safe even though we ignore partial protobuf
+  // read errors above, because the 'fd' is properly set to the
+  // end of the last valid update by 'protobuf::read()'.
+  if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) {
+    return ErrnoError(
+        "Failed to truncate status updates file '" + path + "'");
+  }
+
   // After reading a non-corrupted updates file, 'record' should be 'none'.
   if (record.isError()) {
     message = "Failed to read status updates file  '" + path +
@@ -514,13 +524,6 @@ Try<TaskState> TaskState::recover(
       return Error(message);
     } else {
       LOG(WARNING) << message;
-
-      // Truncate the file to contain only valid updates.
-      if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) {
-        return ErrnoError(
-            "Failed to truncate status updates file '" + path + "'");
-      }
-
       return state;
     }
   }