You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/16 21:04:39 UTC
[2/3] git commit: Fixed slave state recovery to allow for partial
status updates writes.
Fixed slave state recovery to allow for partial status updates writes.
Review: https://reviews.apache.org/r/13624
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a8cefa47
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a8cefa47
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a8cefa47
Branch: refs/heads/master
Commit: a8cefa47b32c3e6d64f47206141d78b0d559d6e4
Parents: 76946c9
Author: Vinod Kone <vi...@twitter.com>
Authored: Thu Aug 15 20:01:15 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri Aug 16 11:58:22 2013 -0700
----------------------------------------------------------------------
src/slave/state.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/a8cefa47/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index cd74e41..aab8d0b 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -492,7 +492,8 @@ Try<TaskState> TaskState::recover(
// Now, read the updates.
Result<StatusUpdateRecord> record = None();
while (true) {
- record = ::protobuf::read<StatusUpdateRecord>(fd.get());
+ // Ignore errors due to partial protobuf read.
+ record = ::protobuf::read<StatusUpdateRecord>(fd.get(), true);
if (!record.isSome()) {
break;
@@ -505,6 +506,15 @@ Try<TaskState> TaskState::recover(
}
}
+ // Always truncate the file to contain only valid updates.
+ // NOTE: This is safe even though we ignore partial protobuf
+ // read errors above, because the 'fd' is properly set to the
+ // end of the last valid update by 'protobuf::read()'.
+ if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) {
+ return ErrnoError(
+ "Failed to truncate status updates file '" + path + "'");
+ }
+
// After reading a non-corrupted updates file, 'record' should be 'none'.
if (record.isError()) {
message = "Failed to read status updates file '" + path +
@@ -514,13 +524,6 @@ Try<TaskState> TaskState::recover(
return Error(message);
} else {
LOG(WARNING) << message;
-
- // Truncate the file to contain only valid updates.
- if (ftruncate(fd.get(), lseek(fd.get(), 0, SEEK_CUR)) != 0) {
- return ErrnoError(
- "Failed to truncate status updates file '" + path + "'");
- }
-
return state;
}
}