You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by mp...@apache.org on 2018/01/30 06:16:27 UTC

[1/3] mesos git commit: Reverted `protobuf::read(path)` to return `Result`.

Repository: mesos
Updated Branches:
  refs/heads/1.5.x fde21090e -> 6c30b2aff


Reverted `protobuf::read(path)` to return `Result<T>`.

Due to the performance hits of `fsync`, the current implementation
of `state::checkpoint` does not invoke `fsync` prior to `rename`.

As a result, according to this blog post:

  https://thunk.org/tytso/blog/2009/03/12/delayed-allocation-and-
  the-zero-length-file-problem/

If the agent machine crashes after `rename` is called but before
the buffer has been flushed, the file will end up being empty.

This reverts commit 4f9cda17e1a747bc3c4ab3667569304e09600b29.

Review: https://reviews.apache.org/r/65376


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ad962b75
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ad962b75
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ad962b75

Branch: refs/heads/1.5.x
Commit: ad962b756691d4c911a78ab71127bbe4ad460004
Parents: fde2109
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 12:33:05 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:10 2018 -0800

----------------------------------------------------------------------
 3rdparty/stout/include/stout/protobuf.hpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ad962b75/3rdparty/stout/include/stout/protobuf.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/stout/include/stout/protobuf.hpp b/3rdparty/stout/include/stout/protobuf.hpp
index 01d7581..f00a048 100644
--- a/3rdparty/stout/include/stout/protobuf.hpp
+++ b/3rdparty/stout/include/stout/protobuf.hpp
@@ -340,7 +340,7 @@ Result<T> read(int_fd fd, bool ignorePartial = false, bool undoFailed = false)
 // A wrapper function that wraps the above read() with open and
 // closing the file.
 template <typename T>
-Try<T> read(const std::string& path)
+Result<T> read(const std::string& path)
 {
   Try<int_fd> fd = os::open(
       path,
@@ -358,13 +358,7 @@ Try<T> read(const std::string& path)
   // read(). Also an unsuccessful close() doesn't affect the read.
   os::close(fd.get());
 
-  if (result.isSome()) {
-    return result.get();
-  }
-
-  // `read(fd)` returning `None` here means that the file is empty.
-  // Since this is a partial read of `T`, we report it as an error.
-  return Error(result.isError() ? result.error() : "Found an empty file");
+  return result;
 }
 
 


[2/3] mesos git commit: Updated `state::read` to return `Result`.

Posted by mp...@apache.org.
Updated `state::read` to return `Result`.

Due to the issue of empty files that can arise from using `rename`
without `fsync` (see commit 3763476), we return `None` to indicate
an empty file.

Review: https://reviews.apache.org/r/65377


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/583dfb81
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/583dfb81
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/583dfb81

Branch: refs/heads/1.5.x
Commit: 583dfb8196de474af90d6ebc66e81485991e1181
Parents: ad962b7
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 14:29:44 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:13 2018 -0800

----------------------------------------------------------------------
 .../mesos/isolators/docker/volume/isolator.cpp  |  2 +-
 src/slave/containerizer/mesos/paths.cpp         |  6 ++---
 src/slave/state.cpp                             | 18 +++++++++------
 src/slave/state.hpp                             | 24 +++++++++++++-------
 4 files changed, 31 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
index 44cf9b2..a6f05a8 100644
--- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
@@ -264,7 +264,7 @@ Try<Nothing> DockerVolumeIsolatorProcess::_recover(
     return Nothing();
   }
 
-  Try<string> read = state::read<string>(volumesPath);
+  Result<string> read = state::read<string>(volumesPath);
   if (read.isError()) {
     return Error(
         "Failed to read docker volumes checkpoint file '" +

http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index 612c967..cb4e29c 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -94,7 +94,7 @@ Result<pid_t> getContainerPid(
     return None();
   }
 
-  Try<string> read = state::read<string>(path);
+  Result<string> read = state::read<string>(path);
   if (read.isError()) {
     return Error("Failed to recover pid of container: " + read.error());
   }
@@ -180,7 +180,7 @@ Result<pid_t> getContainerIOSwitchboardPid(
     return None();
   }
 
-  Try<string> read = state::read<string>(path);
+  Result<string> read = state::read<string>(path);
   if (read.isError()) {
     return Error("Failed to recover pid of io switchboard: " + read.error());
   }
@@ -221,7 +221,7 @@ Result<unix::Address> getContainerIOSwitchboardAddress(
     return None();
   }
 
-  Try<string> read = state::read<string>(path);
+  Result<string> read = state::read<string>(path);
   if (read.isError()) {
     return Error("Failed reading '" + path + "': " + read.error());
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 41c12f6..328de90 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -86,7 +86,7 @@ Try<State> recover(const string& rootDir, bool strict)
 
   const string& bootIdPath = paths::getBootIdPath(rootDir);
   if (os::exists(bootIdPath)) {
-    Try<string> read = state::read<string>(bootIdPath);
+    Result<string> read = state::read<string>(bootIdPath);
     if (read.isError()) {
       LOG(WARNING) << "Failed to read '"
                    << bootIdPath << "': " << read.error();
@@ -240,7 +240,7 @@ Try<FrameworkState> FrameworkState::recover(
     return state;
   }
 
-  Try<string> pid = state::read<string>(path);
+  Result<string> pid = state::read<string>(path);
 
   if (pid.isError()) {
     message =
@@ -455,7 +455,7 @@ Try<RunState> RunState::recover(
     return state;
   }
 
-  Try<string> pid = state::read<string>(path);
+  Result<string> pid = state::read<string>(path);
 
   if (pid.isError()) {
     message = "Failed to read executor forked pid from '" + path +
@@ -681,7 +681,7 @@ Try<ResourcesState> ResourcesState::recover(
     return state;
   }
 
-  Try<Resources> info = state::read<Resources>(infoPath);
+  Result<Resources> info = state::read<Resources>(infoPath);
   if (info.isError()) {
     string message =
       "Failed to read resources file '" + infoPath + "': " + info.error();
@@ -695,7 +695,9 @@ Try<ResourcesState> ResourcesState::recover(
     }
   }
 
-  state.resources = info.get();
+  if (info.isSome()) {
+    state.resources = info.get();
+  }
 
   // Process the target resources.
   const string& targetPath = paths::getResourcesTargetPath(rootDir);
@@ -703,7 +705,7 @@ Try<ResourcesState> ResourcesState::recover(
     return state;
   }
 
-  Try<Resources> target = state::read<Resources>(targetPath);
+  Result<Resources> target = state::read<Resources>(targetPath);
   if (target.isError()) {
     string message =
       "Failed to read resources file '" + targetPath + "': " + target.error();
@@ -717,7 +719,9 @@ Try<ResourcesState> ResourcesState::recover(
     }
   }
 
-  state.target = target.get();
+  if (target.isSome()) {
+    state.target = target.get();
+  }
 
   return state;
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/state.hpp
----------------------------------------------------------------------
diff --git a/src/slave/state.hpp b/src/slave/state.hpp
index dcab443..003211e 100644
--- a/src/slave/state.hpp
+++ b/src/slave/state.hpp
@@ -78,34 +78,42 @@ Try<State> recover(const std::string& rootDir, bool strict);
 // `T` may be either a single protobuf message or a sequence of messages
 // if `T` is a specialization of `google::protobuf::RepeatedPtrField`.
 template <typename T>
-Try<T> read(const std::string& path)
+Result<T> read(const std::string& path)
 {
-  Try<T> result = ::protobuf::read<T>(path);
-  if (result.isError()) {
-    return Error(result.error());
+  Result<T> result = ::protobuf::read<T>(path);
+  if (result.isSome()) {
+    upgradeResources(&result.get());
   }
 
-  upgradeResources(&result.get());
   return result;
 }
 
+
+// While we return a `Result<string>` here in order to keep the return
+// type of `state::read` consistent, the `None` case does not arise here.
+// That is, an empty file will result in an empty string, rather than
+// the `Result` ending up in a `None` state.
 template <>
-inline Try<std::string> read<std::string>(const std::string& path)
+inline Result<std::string> read<std::string>(const std::string& path)
 {
   return os::read(path);
 }
 
 
 template <>
-inline Try<Resources> read<Resources>(const std::string& path)
+inline Result<Resources> read<Resources>(const std::string& path)
 {
-  Try<google::protobuf::RepeatedPtrField<Resource>> resources =
+  Result<google::protobuf::RepeatedPtrField<Resource>> resources =
     read<google::protobuf::RepeatedPtrField<Resource>>(path);
 
   if (resources.isError()) {
     return Error(resources.error());
   }
 
+  if (resources.isNone()) {
+    return None();
+  }
+
   return std::move(resources.get());
 }
 


[3/3] mesos git commit: Reverted the uses of `state::read` since it returns `Result`.

Posted by mp...@apache.org.
Reverted the uses of `state::read` since it returns `Result`.

This reverts commit fda054b50ff7cdd2d7a60d31cfe24ce42bfbfaa5.

Review: https://reviews.apache.org/r/65378


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6c30b2af
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6c30b2af
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6c30b2af

Branch: refs/heads/1.5.x
Commit: 6c30b2aff2cf0ebbe03ac63b667f05a08f72ca3c
Parents: 583dfb8
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 12:45:43 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:18 2018 -0800

----------------------------------------------------------------------
 src/resource_provider/storage/provider.cpp      | 137 ++++++++++---------
 src/slave/containerizer/mesos/paths.cpp         |   7 +-
 .../provisioner/docker/metadata_manager.cpp     |   8 +-
 .../mesos/provisioner/provisioner.cpp           |  14 +-
 src/slave/state.cpp                             |  40 +++++-
 src/tests/protobuf_io_tests.cpp                 |   2 +-
 src/tests/slave_recovery_tests.cpp              |   4 +-
 7 files changed, 129 insertions(+), 83 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/resource_provider/storage/provider.cpp
----------------------------------------------------------------------
diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp
index 1f30d0a..163ce7f 100644
--- a/src/resource_provider/storage/provider.cpp
+++ b/src/resource_provider/storage/provider.cpp
@@ -705,7 +705,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverServices()
           containerId);
 
       if (os::exists(configPath)) {
-        Try<CSIPluginContainerInfo> config =
+        Result<CSIPluginContainerInfo> config =
           slave::state::read<CSIPluginContainerInfo>(configPath);
 
         if (config.isError()) {
@@ -714,7 +714,8 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverServices()
               configPath + "': " + config.error());
         }
 
-        if (getCSIPluginContainerInfo(info, containerId) == config.get()) {
+        if (config.isSome() &&
+            getCSIPluginContainerInfo(info, containerId) == config.get()) {
           continue;
         }
       }
@@ -799,7 +800,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverVolumes()
       continue;
     }
 
-    Try<csi::state::VolumeState> volumeState =
+    Result<csi::state::VolumeState> volumeState =
       slave::state::read<csi::state::VolumeState>(statePath);
 
     if (volumeState.isError()) {
@@ -808,67 +809,69 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverVolumes()
           volumeState.error());
     }
 
-    volumes.put(volumeId, std::move(volumeState.get()));
+    if (volumeState.isSome()) {
+      volumes.put(volumeId, std::move(volumeState.get()));
 
-    Future<Nothing> recovered = Nothing();
+      Future<Nothing> recovered = Nothing();
 
-    switch (volumes.at(volumeId).state.state()) {
-      case csi::state::VolumeState::CREATED:
-      case csi::state::VolumeState::NODE_READY: {
-        break;
-      }
-      case csi::state::VolumeState::PUBLISHED: {
-        if (volumes.at(volumeId).state.boot_id() != bootId) {
-          // The node has been restarted since the volume is mounted,
-          // so it is no longer in the `PUBLISHED` state.
-          volumes.at(volumeId).state.set_state(
-              csi::state::VolumeState::NODE_READY);
-          volumes.at(volumeId).state.clear_boot_id();
-          checkpointVolumeState(volumeId);
+      switch (volumes.at(volumeId).state.state()) {
+        case csi::state::VolumeState::CREATED:
+        case csi::state::VolumeState::NODE_READY: {
+          break;
+        }
+        case csi::state::VolumeState::PUBLISHED: {
+          if (volumes.at(volumeId).state.boot_id() != bootId) {
+            // The node has been restarted since the volume is mounted,
+            // so it is no longer in the `PUBLISHED` state.
+            volumes.at(volumeId).state.set_state(
+                csi::state::VolumeState::NODE_READY);
+            volumes.at(volumeId).state.clear_boot_id();
+            checkpointVolumeState(volumeId);
+          }
+          break;
+        }
+        case csi::state::VolumeState::CONTROLLER_PUBLISH: {
+          recovered =
+            volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+                defer(self(), &Self::controllerPublish, volumeId)));
+          break;
+        }
+        case csi::state::VolumeState::CONTROLLER_UNPUBLISH: {
+          recovered =
+            volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+                defer(self(), &Self::controllerUnpublish, volumeId)));
+          break;
+        }
+        case csi::state::VolumeState::NODE_PUBLISH: {
+          recovered =
+            volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+                defer(self(), &Self::nodePublish, volumeId)));
+          break;
+        }
+        case csi::state::VolumeState::NODE_UNPUBLISH: {
+          recovered =
+            volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+                defer(self(), &Self::nodeUnpublish, volumeId)));
+          break;
+        }
+        case csi::state::VolumeState::UNKNOWN: {
+          recovered = Failure(
+              "Volume '" + volumeId + "' is in " +
+              stringify(volumes.at(volumeId).state.state()) + " state");
         }
-        break;
-      }
-      case csi::state::VolumeState::CONTROLLER_PUBLISH: {
-        recovered =
-          volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
-              defer(self(), &Self::controllerPublish, volumeId)));
-        break;
-      }
-      case csi::state::VolumeState::CONTROLLER_UNPUBLISH: {
-        recovered =
-          volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
-              defer(self(), &Self::controllerUnpublish, volumeId)));
-        break;
-      }
-      case csi::state::VolumeState::NODE_PUBLISH: {
-        recovered =
-          volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
-              defer(self(), &Self::nodePublish, volumeId)));
-        break;
-      }
-      case csi::state::VolumeState::NODE_UNPUBLISH: {
-        recovered =
-          volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
-              defer(self(), &Self::nodeUnpublish, volumeId)));
-        break;
-      }
-      case csi::state::VolumeState::UNKNOWN: {
-        recovered = Failure(
-            "Volume '" + volumeId + "' is in " +
-            stringify(volumes.at(volumeId).state.state()) + " state");
-      }
 
-      // NOTE: We avoid using a default clause for the following
-      // values in proto3's open enum to enable the compiler to detect
-      // missing enum cases for us. See:
-      // https://github.com/google/protobuf/issues/3917
-      case google::protobuf::kint32min:
-      case google::protobuf::kint32max: {
-        UNREACHABLE();
+        // NOTE: We avoid using a default clause for the following
+        // values in proto3's open enum to enable the compiler to detect
+        // missing enum cases for us. See:
+        // https://github.com/google/protobuf/issues/3917
+        case google::protobuf::kint32min:
+        case google::protobuf::kint32max: {
+          UNREACHABLE();
+        }
       }
-    }
 
-    futures.push_back(recovered);
+      futures.push_back(recovered);
+    }
   }
 
   return collect(futures).then([] { return Nothing(); });
@@ -903,7 +906,7 @@ StorageLocalResourceProviderProcess::recoverResourceProviderState()
       return Nothing();
     }
 
-    Try<ResourceProviderState> resourceProviderState =
+    Result<ResourceProviderState> resourceProviderState =
       slave::state::read<ResourceProviderState>(statePath);
 
     if (resourceProviderState.isError()) {
@@ -912,16 +915,18 @@ StorageLocalResourceProviderProcess::recoverResourceProviderState()
           "': " + resourceProviderState.error());
     }
 
-    foreach (const Operation& operation,
-             resourceProviderState->operations()) {
-      Try<id::UUID> uuid = id::UUID::fromBytes(operation.uuid().value());
+    if (resourceProviderState.isSome()) {
+      foreach (const Operation& operation,
+               resourceProviderState->operations()) {
+        Try<id::UUID> uuid = id::UUID::fromBytes(operation.uuid().value());
 
-      CHECK_SOME(uuid);
+        CHECK_SOME(uuid);
 
-      operations[uuid.get()] = operation;
-    }
+        operations[uuid.get()] = operation;
+      }
 
-    totalResources = resourceProviderState->resources();
+      totalResources = resourceProviderState->resources();
+    }
   }
 
   return Nothing();

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index cb4e29c..bb5570f 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -277,7 +277,7 @@ Result<ContainerTermination> getContainerTermination(
     return None();
   }
 
-  Try<ContainerTermination> termination =
+  Result<ContainerTermination> termination =
     state::read<ContainerTermination>(path);
 
   if (termination.isError()) {
@@ -325,7 +325,8 @@ Result<ContainerConfig> getContainerConfig(
     return None();
   }
 
-  Try<ContainerConfig> containerConfig = state::read<ContainerConfig>(path);
+  Result<ContainerConfig> containerConfig = state::read<ContainerConfig>(path);
+
   if (containerConfig.isError()) {
     return Error("Failed to read launch config of container: " +
                  containerConfig.error());
@@ -420,7 +421,7 @@ Result<ContainerLaunchInfo> getContainerLaunchInfo(
     return None();
   }
 
-  Try<ContainerLaunchInfo> containerLaunchInfo =
+  Result<ContainerLaunchInfo> containerLaunchInfo =
     state::read<ContainerLaunchInfo>(path);
 
   if (containerLaunchInfo.isError()) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
index 168f786..e1939a4 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
@@ -256,12 +256,18 @@ Future<Nothing> MetadataManagerProcess::recover()
     return Nothing();
   }
 
-  Try<Images> images = state::read<Images>(storedImagesPath);
+  Result<Images> images = state::read<Images>(storedImagesPath);
   if (images.isError()) {
     return Failure("Failed to read images from '" + storedImagesPath + "' " +
                    images.error());
   }
 
+  if (images.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before persisted on disk.
+    return Failure("Unexpected empty images file '" + storedImagesPath + "'");
+  }
+
   foreach (const Image& image, images.get().images()) {
     const string imageReference = stringify(image.reference());
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index f7dbae6..6280144 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -429,18 +429,20 @@ Future<Nothing> ProvisionerProcess::recover(
       VLOG(1) << "Layers path '" << path << "' is missing for container' "
               << containerId << "'";
     } else {
-      Try<ContainerLayers> layers = state::read<ContainerLayers>(path);
+      Result<ContainerLayers> layers = state::read<ContainerLayers>(path);
       if (layers.isError()) {
         return Failure(
             "Failed to recover layers for container '" +
             stringify(containerId) + "': " + layers.error());
       }
 
-      info->layers = vector<string>();
-      std::copy(
-          layers->paths().begin(),
-          layers->paths().end(),
-          std::back_inserter(info->layers.get()));
+      if (layers.isSome()) {
+        info->layers = vector<string>();
+        std::copy(
+            layers->paths().begin(),
+            layers->paths().end(),
+            std::back_inserter(info->layers.get()));
+      }
     }
 
     infos.put(containerId, info);

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 328de90..e7cf849 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -151,7 +151,8 @@ Try<SlaveState> SlaveState::recover(
     return state;
   }
 
-  Try<SlaveInfo> slaveInfo = state::read<SlaveInfo>(path);
+  Result<SlaveInfo> slaveInfo = state::read<SlaveInfo>(path);
+
   if (slaveInfo.isError()) {
     const string& message = "Failed to read agent info from '" + path + "': " +
                             slaveInfo.error();
@@ -164,6 +165,13 @@ Try<SlaveState> SlaveState::recover(
     }
   }
 
+  if (slaveInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty agent info file '" << path << "'";
+    return state;
+  }
+
   state.info = slaveInfo.get();
 
   // Find the frameworks.
@@ -215,7 +223,8 @@ Try<FrameworkState> FrameworkState::recover(
     return state;
   }
 
-  const Try<FrameworkInfo> frameworkInfo = state::read<FrameworkInfo>(path);
+  const Result<FrameworkInfo> frameworkInfo = state::read<FrameworkInfo>(path);
+
   if (frameworkInfo.isError()) {
     message = "Failed to read framework info from '" + path + "': " +
               frameworkInfo.error();
@@ -229,6 +238,13 @@ Try<FrameworkState> FrameworkState::recover(
     }
   }
 
+  if (frameworkInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty framework info file '" << path << "'";
+    return state;
+  }
+
   state.info = frameworkInfo.get();
 
   // Read the framework pid.
@@ -373,7 +389,8 @@ Try<ExecutorState> ExecutorState::recover(
     return state;
   }
 
-  Try<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path);
+  Result<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path);
+
   if (executorInfo.isError()) {
     message = "Failed to read executor info from '" + path + "': " +
               executorInfo.error();
@@ -387,6 +404,13 @@ Try<ExecutorState> ExecutorState::recover(
     }
   }
 
+  if (executorInfo.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty executor info file '" << path << "'";
+    return state;
+  }
+
   state.info = executorInfo.get();
 
   return state;
@@ -562,7 +586,8 @@ Try<TaskState> TaskState::recover(
     return state;
   }
 
-  Try<Task> task = state::read<Task>(path);
+  Result<Task> task = state::read<Task>(path);
+
   if (task.isError()) {
     message = "Failed to read task info from '" + path + "': " + task.error();
 
@@ -575,6 +600,13 @@ Try<TaskState> TaskState::recover(
     }
   }
 
+  if (task.isNone()) {
+    // This could happen if the slave died after opening the file for
+    // writing but before it checkpointed anything.
+    LOG(WARNING) << "Found empty task info file '" << path << "'";
+    return state;
+  }
+
   state.info = task.get();
 
   // Read the status updates.

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/tests/protobuf_io_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/protobuf_io_tests.cpp b/src/tests/protobuf_io_tests.cpp
index ddbda03..ffb949f 100644
--- a/src/tests/protobuf_io_tests.cpp
+++ b/src/tests/protobuf_io_tests.cpp
@@ -150,7 +150,7 @@ TEST_F(ProtobufIOTest, RepeatedPtrField)
   Try<Nothing> write = ::protobuf::write(file, expected);
   ASSERT_SOME(write);
 
-  Try<RepeatedPtrField<FrameworkID>> actual =
+  Result<RepeatedPtrField<FrameworkID>> actual =
     ::protobuf::read<RepeatedPtrField<FrameworkID>>(file);
 
   ASSERT_SOME(actual);

http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 641fc6e..871a801 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -128,7 +128,7 @@ TEST_F(SlaveStateTest, CheckpointProtobufMessage)
   const string file = "slave.id";
   slave::state::checkpoint(file, expected);
 
-  const Try<SlaveID> actual = slave::state::read<SlaveID>(file);
+  const Result<SlaveID> actual = slave::state::read<SlaveID>(file);
   ASSERT_SOME(actual);
 
   EXPECT_SOME_EQ(expected, actual);
@@ -144,7 +144,7 @@ TEST_F(SlaveStateTest, CheckpointRepeatedProtobufMessages)
   const string file = "resources-file";
   slave::state::checkpoint(file, expected);
 
-  const Try<Resources> actual = slave::state::read<Resources>(file);
+  const Result<Resources> actual = slave::state::read<Resources>(file);
 
   ASSERT_SOME(actual);
   EXPECT_SOME_EQ(expected, actual);