You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by mp...@apache.org on 2018/01/30 06:16:27 UTC
[1/3] mesos git commit: Reverted `protobuf::read(path)` to return
`Result`.
Repository: mesos
Updated Branches:
refs/heads/1.5.x fde21090e -> 6c30b2aff
Reverted `protobuf::read(path)` to return `Result<T>`.
Due to the performance hits of `fsync`, the current implementation
of `state::checkpoint` does not invoke `fsync` prior to `rename`.
As a result, according to this blog post:
https://thunk.org/tytso/blog/2009/03/12/delayed-allocation-and-
the-zero-length-file-problem/
If the agent machine crashes after `rename` is called but before
the buffer has been flushed, the file will end up being empty.
This reverts commit 4f9cda17e1a747bc3c4ab3667569304e09600b29.
Review: https://reviews.apache.org/r/65376
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ad962b75
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ad962b75
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ad962b75
Branch: refs/heads/1.5.x
Commit: ad962b756691d4c911a78ab71127bbe4ad460004
Parents: fde2109
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 12:33:05 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:10 2018 -0800
----------------------------------------------------------------------
3rdparty/stout/include/stout/protobuf.hpp | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ad962b75/3rdparty/stout/include/stout/protobuf.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/stout/include/stout/protobuf.hpp b/3rdparty/stout/include/stout/protobuf.hpp
index 01d7581..f00a048 100644
--- a/3rdparty/stout/include/stout/protobuf.hpp
+++ b/3rdparty/stout/include/stout/protobuf.hpp
@@ -340,7 +340,7 @@ Result<T> read(int_fd fd, bool ignorePartial = false, bool undoFailed = false)
// A wrapper function that wraps the above read() with open and
// closing the file.
template <typename T>
-Try<T> read(const std::string& path)
+Result<T> read(const std::string& path)
{
Try<int_fd> fd = os::open(
path,
@@ -358,13 +358,7 @@ Try<T> read(const std::string& path)
// read(). Also an unsuccessful close() doesn't affect the read.
os::close(fd.get());
- if (result.isSome()) {
- return result.get();
- }
-
- // `read(fd)` returning `None` here means that the file is empty.
- // Since this is a partial read of `T`, we report it as an error.
- return Error(result.isError() ? result.error() : "Found an empty file");
+ return result;
}
[2/3] mesos git commit: Updated `state::read` to return `Result`.
Posted by mp...@apache.org.
Updated `state::read` to return `Result`.
Due to the issue of empty files that can arise from using `rename`
without `fsync` (see commit 3763476), we return `None` to indicate
an empty file.
Review: https://reviews.apache.org/r/65377
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/583dfb81
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/583dfb81
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/583dfb81
Branch: refs/heads/1.5.x
Commit: 583dfb8196de474af90d6ebc66e81485991e1181
Parents: ad962b7
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 14:29:44 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:13 2018 -0800
----------------------------------------------------------------------
.../mesos/isolators/docker/volume/isolator.cpp | 2 +-
src/slave/containerizer/mesos/paths.cpp | 6 ++---
src/slave/state.cpp | 18 +++++++++------
src/slave/state.hpp | 24 +++++++++++++-------
4 files changed, 31 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
index 44cf9b2..a6f05a8 100644
--- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
@@ -264,7 +264,7 @@ Try<Nothing> DockerVolumeIsolatorProcess::_recover(
return Nothing();
}
- Try<string> read = state::read<string>(volumesPath);
+ Result<string> read = state::read<string>(volumesPath);
if (read.isError()) {
return Error(
"Failed to read docker volumes checkpoint file '" +
http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index 612c967..cb4e29c 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -94,7 +94,7 @@ Result<pid_t> getContainerPid(
return None();
}
- Try<string> read = state::read<string>(path);
+ Result<string> read = state::read<string>(path);
if (read.isError()) {
return Error("Failed to recover pid of container: " + read.error());
}
@@ -180,7 +180,7 @@ Result<pid_t> getContainerIOSwitchboardPid(
return None();
}
- Try<string> read = state::read<string>(path);
+ Result<string> read = state::read<string>(path);
if (read.isError()) {
return Error("Failed to recover pid of io switchboard: " + read.error());
}
@@ -221,7 +221,7 @@ Result<unix::Address> getContainerIOSwitchboardAddress(
return None();
}
- Try<string> read = state::read<string>(path);
+ Result<string> read = state::read<string>(path);
if (read.isError()) {
return Error("Failed reading '" + path + "': " + read.error());
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 41c12f6..328de90 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -86,7 +86,7 @@ Try<State> recover(const string& rootDir, bool strict)
const string& bootIdPath = paths::getBootIdPath(rootDir);
if (os::exists(bootIdPath)) {
- Try<string> read = state::read<string>(bootIdPath);
+ Result<string> read = state::read<string>(bootIdPath);
if (read.isError()) {
LOG(WARNING) << "Failed to read '"
<< bootIdPath << "': " << read.error();
@@ -240,7 +240,7 @@ Try<FrameworkState> FrameworkState::recover(
return state;
}
- Try<string> pid = state::read<string>(path);
+ Result<string> pid = state::read<string>(path);
if (pid.isError()) {
message =
@@ -455,7 +455,7 @@ Try<RunState> RunState::recover(
return state;
}
- Try<string> pid = state::read<string>(path);
+ Result<string> pid = state::read<string>(path);
if (pid.isError()) {
message = "Failed to read executor forked pid from '" + path +
@@ -681,7 +681,7 @@ Try<ResourcesState> ResourcesState::recover(
return state;
}
- Try<Resources> info = state::read<Resources>(infoPath);
+ Result<Resources> info = state::read<Resources>(infoPath);
if (info.isError()) {
string message =
"Failed to read resources file '" + infoPath + "': " + info.error();
@@ -695,7 +695,9 @@ Try<ResourcesState> ResourcesState::recover(
}
}
- state.resources = info.get();
+ if (info.isSome()) {
+ state.resources = info.get();
+ }
// Process the target resources.
const string& targetPath = paths::getResourcesTargetPath(rootDir);
@@ -703,7 +705,7 @@ Try<ResourcesState> ResourcesState::recover(
return state;
}
- Try<Resources> target = state::read<Resources>(targetPath);
+ Result<Resources> target = state::read<Resources>(targetPath);
if (target.isError()) {
string message =
"Failed to read resources file '" + targetPath + "': " + target.error();
@@ -717,7 +719,9 @@ Try<ResourcesState> ResourcesState::recover(
}
}
- state.target = target.get();
+ if (target.isSome()) {
+ state.target = target.get();
+ }
return state;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/583dfb81/src/slave/state.hpp
----------------------------------------------------------------------
diff --git a/src/slave/state.hpp b/src/slave/state.hpp
index dcab443..003211e 100644
--- a/src/slave/state.hpp
+++ b/src/slave/state.hpp
@@ -78,34 +78,42 @@ Try<State> recover(const std::string& rootDir, bool strict);
// `T` may be either a single protobuf message or a sequence of messages
// if `T` is a specialization of `google::protobuf::RepeatedPtrField`.
template <typename T>
-Try<T> read(const std::string& path)
+Result<T> read(const std::string& path)
{
- Try<T> result = ::protobuf::read<T>(path);
- if (result.isError()) {
- return Error(result.error());
+ Result<T> result = ::protobuf::read<T>(path);
+ if (result.isSome()) {
+ upgradeResources(&result.get());
}
- upgradeResources(&result.get());
return result;
}
+
+// While we return a `Result<string>` here in order to keep the return
+// type of `state::read` consistent, the `None` case does not arise here.
+// That is, an empty file will result in an empty string, rather than
+// the `Result` ending up in a `None` state.
template <>
-inline Try<std::string> read<std::string>(const std::string& path)
+inline Result<std::string> read<std::string>(const std::string& path)
{
return os::read(path);
}
template <>
-inline Try<Resources> read<Resources>(const std::string& path)
+inline Result<Resources> read<Resources>(const std::string& path)
{
- Try<google::protobuf::RepeatedPtrField<Resource>> resources =
+ Result<google::protobuf::RepeatedPtrField<Resource>> resources =
read<google::protobuf::RepeatedPtrField<Resource>>(path);
if (resources.isError()) {
return Error(resources.error());
}
+ if (resources.isNone()) {
+ return None();
+ }
+
return std::move(resources.get());
}
[3/3] mesos git commit: Reverted the uses of `state::read` since it
returns `Result`.
Posted by mp...@apache.org.
Reverted the uses of `state::read` since it returns `Result`.
This reverts commit fda054b50ff7cdd2d7a60d31cfe24ce42bfbfaa5.
Review: https://reviews.apache.org/r/65378
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6c30b2af
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6c30b2af
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6c30b2af
Branch: refs/heads/1.5.x
Commit: 6c30b2aff2cf0ebbe03ac63b667f05a08f72ca3c
Parents: 583dfb8
Author: Michael Park <mp...@apache.org>
Authored: Fri Jan 26 12:45:43 2018 -0800
Committer: Michael Park <mp...@apache.org>
Committed: Mon Jan 29 19:06:18 2018 -0800
----------------------------------------------------------------------
src/resource_provider/storage/provider.cpp | 137 ++++++++++---------
src/slave/containerizer/mesos/paths.cpp | 7 +-
.../provisioner/docker/metadata_manager.cpp | 8 +-
.../mesos/provisioner/provisioner.cpp | 14 +-
src/slave/state.cpp | 40 +++++-
src/tests/protobuf_io_tests.cpp | 2 +-
src/tests/slave_recovery_tests.cpp | 4 +-
7 files changed, 129 insertions(+), 83 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/resource_provider/storage/provider.cpp
----------------------------------------------------------------------
diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp
index 1f30d0a..163ce7f 100644
--- a/src/resource_provider/storage/provider.cpp
+++ b/src/resource_provider/storage/provider.cpp
@@ -705,7 +705,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverServices()
containerId);
if (os::exists(configPath)) {
- Try<CSIPluginContainerInfo> config =
+ Result<CSIPluginContainerInfo> config =
slave::state::read<CSIPluginContainerInfo>(configPath);
if (config.isError()) {
@@ -714,7 +714,8 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverServices()
configPath + "': " + config.error());
}
- if (getCSIPluginContainerInfo(info, containerId) == config.get()) {
+ if (config.isSome() &&
+ getCSIPluginContainerInfo(info, containerId) == config.get()) {
continue;
}
}
@@ -799,7 +800,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverVolumes()
continue;
}
- Try<csi::state::VolumeState> volumeState =
+ Result<csi::state::VolumeState> volumeState =
slave::state::read<csi::state::VolumeState>(statePath);
if (volumeState.isError()) {
@@ -808,67 +809,69 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverVolumes()
volumeState.error());
}
- volumes.put(volumeId, std::move(volumeState.get()));
+ if (volumeState.isSome()) {
+ volumes.put(volumeId, std::move(volumeState.get()));
- Future<Nothing> recovered = Nothing();
+ Future<Nothing> recovered = Nothing();
- switch (volumes.at(volumeId).state.state()) {
- case csi::state::VolumeState::CREATED:
- case csi::state::VolumeState::NODE_READY: {
- break;
- }
- case csi::state::VolumeState::PUBLISHED: {
- if (volumes.at(volumeId).state.boot_id() != bootId) {
- // The node has been restarted since the volume is mounted,
- // so it is no longer in the `PUBLISHED` state.
- volumes.at(volumeId).state.set_state(
- csi::state::VolumeState::NODE_READY);
- volumes.at(volumeId).state.clear_boot_id();
- checkpointVolumeState(volumeId);
+ switch (volumes.at(volumeId).state.state()) {
+ case csi::state::VolumeState::CREATED:
+ case csi::state::VolumeState::NODE_READY: {
+ break;
+ }
+ case csi::state::VolumeState::PUBLISHED: {
+ if (volumes.at(volumeId).state.boot_id() != bootId) {
+ // The node has been restarted since the volume is mounted,
+ // so it is no longer in the `PUBLISHED` state.
+ volumes.at(volumeId).state.set_state(
+ csi::state::VolumeState::NODE_READY);
+ volumes.at(volumeId).state.clear_boot_id();
+ checkpointVolumeState(volumeId);
+ }
+ break;
+ }
+ case csi::state::VolumeState::CONTROLLER_PUBLISH: {
+ recovered =
+ volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+ defer(self(), &Self::controllerPublish, volumeId)));
+ break;
+ }
+ case csi::state::VolumeState::CONTROLLER_UNPUBLISH: {
+ recovered =
+ volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+ defer(self(), &Self::controllerUnpublish, volumeId)));
+ break;
+ }
+ case csi::state::VolumeState::NODE_PUBLISH: {
+ recovered =
+ volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+ defer(self(), &Self::nodePublish, volumeId)));
+ break;
+ }
+ case csi::state::VolumeState::NODE_UNPUBLISH: {
+ recovered =
+ volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
+ defer(self(), &Self::nodeUnpublish, volumeId)));
+ break;
+ }
+ case csi::state::VolumeState::UNKNOWN: {
+ recovered = Failure(
+ "Volume '" + volumeId + "' is in " +
+ stringify(volumes.at(volumeId).state.state()) + " state");
}
- break;
- }
- case csi::state::VolumeState::CONTROLLER_PUBLISH: {
- recovered =
- volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
- defer(self(), &Self::controllerPublish, volumeId)));
- break;
- }
- case csi::state::VolumeState::CONTROLLER_UNPUBLISH: {
- recovered =
- volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
- defer(self(), &Self::controllerUnpublish, volumeId)));
- break;
- }
- case csi::state::VolumeState::NODE_PUBLISH: {
- recovered =
- volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
- defer(self(), &Self::nodePublish, volumeId)));
- break;
- }
- case csi::state::VolumeState::NODE_UNPUBLISH: {
- recovered =
- volumes.at(volumeId).sequence->add(std::function<Future<Nothing>()>(
- defer(self(), &Self::nodeUnpublish, volumeId)));
- break;
- }
- case csi::state::VolumeState::UNKNOWN: {
- recovered = Failure(
- "Volume '" + volumeId + "' is in " +
- stringify(volumes.at(volumeId).state.state()) + " state");
- }
- // NOTE: We avoid using a default clause for the following
- // values in proto3's open enum to enable the compiler to detect
- // missing enum cases for us. See:
- // https://github.com/google/protobuf/issues/3917
- case google::protobuf::kint32min:
- case google::protobuf::kint32max: {
- UNREACHABLE();
+ // NOTE: We avoid using a default clause for the following
+ // values in proto3's open enum to enable the compiler to detect
+ // missing enum cases for us. See:
+ // https://github.com/google/protobuf/issues/3917
+ case google::protobuf::kint32min:
+ case google::protobuf::kint32max: {
+ UNREACHABLE();
+ }
}
- }
- futures.push_back(recovered);
+ futures.push_back(recovered);
+ }
}
return collect(futures).then([] { return Nothing(); });
@@ -903,7 +906,7 @@ StorageLocalResourceProviderProcess::recoverResourceProviderState()
return Nothing();
}
- Try<ResourceProviderState> resourceProviderState =
+ Result<ResourceProviderState> resourceProviderState =
slave::state::read<ResourceProviderState>(statePath);
if (resourceProviderState.isError()) {
@@ -912,16 +915,18 @@ StorageLocalResourceProviderProcess::recoverResourceProviderState()
"': " + resourceProviderState.error());
}
- foreach (const Operation& operation,
- resourceProviderState->operations()) {
- Try<id::UUID> uuid = id::UUID::fromBytes(operation.uuid().value());
+ if (resourceProviderState.isSome()) {
+ foreach (const Operation& operation,
+ resourceProviderState->operations()) {
+ Try<id::UUID> uuid = id::UUID::fromBytes(operation.uuid().value());
- CHECK_SOME(uuid);
+ CHECK_SOME(uuid);
- operations[uuid.get()] = operation;
- }
+ operations[uuid.get()] = operation;
+ }
- totalResources = resourceProviderState->resources();
+ totalResources = resourceProviderState->resources();
+ }
}
return Nothing();
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index cb4e29c..bb5570f 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -277,7 +277,7 @@ Result<ContainerTermination> getContainerTermination(
return None();
}
- Try<ContainerTermination> termination =
+ Result<ContainerTermination> termination =
state::read<ContainerTermination>(path);
if (termination.isError()) {
@@ -325,7 +325,8 @@ Result<ContainerConfig> getContainerConfig(
return None();
}
- Try<ContainerConfig> containerConfig = state::read<ContainerConfig>(path);
+ Result<ContainerConfig> containerConfig = state::read<ContainerConfig>(path);
+
if (containerConfig.isError()) {
return Error("Failed to read launch config of container: " +
containerConfig.error());
@@ -420,7 +421,7 @@ Result<ContainerLaunchInfo> getContainerLaunchInfo(
return None();
}
- Try<ContainerLaunchInfo> containerLaunchInfo =
+ Result<ContainerLaunchInfo> containerLaunchInfo =
state::read<ContainerLaunchInfo>(path);
if (containerLaunchInfo.isError()) {
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
index 168f786..e1939a4 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
@@ -256,12 +256,18 @@ Future<Nothing> MetadataManagerProcess::recover()
return Nothing();
}
- Try<Images> images = state::read<Images>(storedImagesPath);
+ Result<Images> images = state::read<Images>(storedImagesPath);
if (images.isError()) {
return Failure("Failed to read images from '" + storedImagesPath + "' " +
images.error());
}
+ if (images.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before persisted on disk.
+ return Failure("Unexpected empty images file '" + storedImagesPath + "'");
+ }
+
foreach (const Image& image, images.get().images()) {
const string imageReference = stringify(image.reference());
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index f7dbae6..6280144 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -429,18 +429,20 @@ Future<Nothing> ProvisionerProcess::recover(
VLOG(1) << "Layers path '" << path << "' is missing for container' "
<< containerId << "'";
} else {
- Try<ContainerLayers> layers = state::read<ContainerLayers>(path);
+ Result<ContainerLayers> layers = state::read<ContainerLayers>(path);
if (layers.isError()) {
return Failure(
"Failed to recover layers for container '" +
stringify(containerId) + "': " + layers.error());
}
- info->layers = vector<string>();
- std::copy(
- layers->paths().begin(),
- layers->paths().end(),
- std::back_inserter(info->layers.get()));
+ if (layers.isSome()) {
+ info->layers = vector<string>();
+ std::copy(
+ layers->paths().begin(),
+ layers->paths().end(),
+ std::back_inserter(info->layers.get()));
+ }
}
infos.put(containerId, info);
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 328de90..e7cf849 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -151,7 +151,8 @@ Try<SlaveState> SlaveState::recover(
return state;
}
- Try<SlaveInfo> slaveInfo = state::read<SlaveInfo>(path);
+ Result<SlaveInfo> slaveInfo = state::read<SlaveInfo>(path);
+
if (slaveInfo.isError()) {
const string& message = "Failed to read agent info from '" + path + "': " +
slaveInfo.error();
@@ -164,6 +165,13 @@ Try<SlaveState> SlaveState::recover(
}
}
+ if (slaveInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty agent info file '" << path << "'";
+ return state;
+ }
+
state.info = slaveInfo.get();
// Find the frameworks.
@@ -215,7 +223,8 @@ Try<FrameworkState> FrameworkState::recover(
return state;
}
- const Try<FrameworkInfo> frameworkInfo = state::read<FrameworkInfo>(path);
+ const Result<FrameworkInfo> frameworkInfo = state::read<FrameworkInfo>(path);
+
if (frameworkInfo.isError()) {
message = "Failed to read framework info from '" + path + "': " +
frameworkInfo.error();
@@ -229,6 +238,13 @@ Try<FrameworkState> FrameworkState::recover(
}
}
+ if (frameworkInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty framework info file '" << path << "'";
+ return state;
+ }
+
state.info = frameworkInfo.get();
// Read the framework pid.
@@ -373,7 +389,8 @@ Try<ExecutorState> ExecutorState::recover(
return state;
}
- Try<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path);
+ Result<ExecutorInfo> executorInfo = state::read<ExecutorInfo>(path);
+
if (executorInfo.isError()) {
message = "Failed to read executor info from '" + path + "': " +
executorInfo.error();
@@ -387,6 +404,13 @@ Try<ExecutorState> ExecutorState::recover(
}
}
+ if (executorInfo.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty executor info file '" << path << "'";
+ return state;
+ }
+
state.info = executorInfo.get();
return state;
@@ -562,7 +586,8 @@ Try<TaskState> TaskState::recover(
return state;
}
- Try<Task> task = state::read<Task>(path);
+ Result<Task> task = state::read<Task>(path);
+
if (task.isError()) {
message = "Failed to read task info from '" + path + "': " + task.error();
@@ -575,6 +600,13 @@ Try<TaskState> TaskState::recover(
}
}
+ if (task.isNone()) {
+ // This could happen if the slave died after opening the file for
+ // writing but before it checkpointed anything.
+ LOG(WARNING) << "Found empty task info file '" << path << "'";
+ return state;
+ }
+
state.info = task.get();
// Read the status updates.
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/tests/protobuf_io_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/protobuf_io_tests.cpp b/src/tests/protobuf_io_tests.cpp
index ddbda03..ffb949f 100644
--- a/src/tests/protobuf_io_tests.cpp
+++ b/src/tests/protobuf_io_tests.cpp
@@ -150,7 +150,7 @@ TEST_F(ProtobufIOTest, RepeatedPtrField)
Try<Nothing> write = ::protobuf::write(file, expected);
ASSERT_SOME(write);
- Try<RepeatedPtrField<FrameworkID>> actual =
+ Result<RepeatedPtrField<FrameworkID>> actual =
::protobuf::read<RepeatedPtrField<FrameworkID>>(file);
ASSERT_SOME(actual);
http://git-wip-us.apache.org/repos/asf/mesos/blob/6c30b2af/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 641fc6e..871a801 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -128,7 +128,7 @@ TEST_F(SlaveStateTest, CheckpointProtobufMessage)
const string file = "slave.id";
slave::state::checkpoint(file, expected);
- const Try<SlaveID> actual = slave::state::read<SlaveID>(file);
+ const Result<SlaveID> actual = slave::state::read<SlaveID>(file);
ASSERT_SOME(actual);
EXPECT_SOME_EQ(expected, actual);
@@ -144,7 +144,7 @@ TEST_F(SlaveStateTest, CheckpointRepeatedProtobufMessages)
const string file = "resources-file";
slave::state::checkpoint(file, expected);
- const Try<Resources> actual = slave::state::read<Resources>(file);
+ const Result<Resources> actual = slave::state::read<Resources>(file);
ASSERT_SOME(actual);
EXPECT_SOME_EQ(expected, actual);