You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2016/12/21 20:06:52 UTC
[1/5] mesos git commit: Simplified `State::recover` interface in the
agent.
Repository: mesos
Updated Branches:
refs/heads/master 477275744 -> 744e7aa9b
Simplified `State::recover` interface in the agent.
`State::recover` previously returned `Result<State>`, which implied
three possibilities: Error, None (if the `work_dir` doesn't exist), or a
`State` value. However, the agent code treated the latter two cases the
same way: if the `work_dir` doesn't exist, it is simpler to represent
this case by returning a `State` value whose fields are `None()`.
Review: https://reviews.apache.org/r/53233/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e5fe8119
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e5fe8119
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e5fe8119
Branch: refs/heads/master
Commit: e5fe81195b3708f8cabe683c0aa53ae2433506eb
Parents: 4772757
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Dec 21 15:06:07 2016 -0500
Committer: Vinod Kone <vi...@gmail.com>
Committed: Wed Dec 21 15:06:07 2016 -0500
----------------------------------------------------------------------
src/slave/slave.cpp | 10 +++-------
src/slave/slave.hpp | 2 +-
src/slave/state.cpp | 9 ++++-----
src/slave/state.hpp | 11 +++++------
4 files changed, 13 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/e5fe8119/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index a7a3a39..c72959b 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -5181,18 +5181,14 @@ void Slave::_checkDiskUsage(const Future<double>& usage)
}
-Future<Nothing> Slave::recover(const Result<state::State>& state)
+Future<Nothing> Slave::recover(const Try<state::State>& state)
{
if (state.isError()) {
return Failure(state.error());
}
- Option<ResourcesState> resourcesState;
- Option<SlaveState> slaveState;
- if (state.isSome()) {
- resourcesState = state.get().resources;
- slaveState = state.get().slave;
- }
+ Option<ResourcesState> resourcesState = state->resources;
+ Option<SlaveState> slaveState = state->slave;
// Recover checkpointed resources.
// NOTE: 'resourcesState' is None if the slave rootDir does not
http://git-wip-us.apache.org/repos/asf/mesos/blob/e5fe8119/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 03860b5..5b4a271 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -390,7 +390,7 @@ public:
void checkDiskUsage();
// Recovers the slave, status update manager and isolator.
- process::Future<Nothing> recover(const Result<state::State>& state);
+ process::Future<Nothing> recover(const Try<state::State>& state);
// This is called after 'recover()'. If 'flags.reconnect' is
// 'reconnect', the slave attempts to reconnect to any old live
http://git-wip-us.apache.org/repos/asf/mesos/blob/e5fe8119/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index a94bb8d..8656694 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -52,20 +52,19 @@ using std::string;
using std::max;
-Result<State> recover(const string& rootDir, bool strict)
+Try<State> recover(const string& rootDir, bool strict)
{
LOG(INFO) << "Recovering state from '" << rootDir << "'";
+ State state;
+
// We consider the absence of 'rootDir' to mean that this is either
// the first time this slave was started or this slave was started after
// an upgrade (--recover=cleanup).
if (!os::exists(rootDir)) {
- return None();
+ return state;
}
- // Now, start to recover state from 'rootDir'.
- State state;
-
// Recover resources regardless whether the host has rebooted.
Try<ResourcesState> resources = ResourcesState::recover(rootDir, strict);
if (resources.isError()) {
http://git-wip-us.apache.org/repos/asf/mesos/blob/e5fe8119/src/slave/state.hpp
----------------------------------------------------------------------
diff --git a/src/slave/state.hpp b/src/slave/state.hpp
index a1c8496..8c57fd6 100644
--- a/src/slave/state.hpp
+++ b/src/slave/state.hpp
@@ -64,14 +64,13 @@ struct TaskState;
// If the 'strict' flag is set, any errors encountered while
// recovering a state are considered fatal and hence the recovery is
// short-circuited and returns an error. There might be orphaned
-// executors that need to be manually cleaned up. If 'strict' flag is
-// not set, any errors encountered are considered non-fatal and the
+// executors that need to be manually cleaned up. If the 'strict' flag
+// is not set, any errors encountered are considered non-fatal and the
// recovery continues by recovering as much of the state as possible,
// while increasing the 'errors' count. Note that 'errors' on a struct
// includes the 'errors' encountered recursively. In other words,
-// 'State.errors' is the sum total of all recovery errors. If the
-// machine has rebooted since the last slave run, None is returned.
-Result<State> recover(const std::string& rootDir, bool strict);
+// 'State.errors' is the sum total of all recovery errors.
+Try<State> recover(const std::string& rootDir, bool strict);
namespace internal {
@@ -312,7 +311,7 @@ struct State
// TODO(jieyu): Consider using a vector of Option<Error> here so
// that we can print all the errors. This also applies to all the
- // State structs below.
+ // State structs above.
unsigned int errors;
};
[4/5] mesos git commit: Cleaned up
Master::markUnreachableAfterFailover.
Posted by vi...@apache.org.
Cleaned up Master::markUnreachableAfterFailover.
Review: https://reviews.apache.org/r/53236/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f076bb8c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f076bb8c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f076bb8c
Branch: refs/heads/master
Commit: f076bb8c795cb270c68d79478b252302c8a4de31
Parents: fcf38bf
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Dec 21 15:06:25 2016 -0500
Committer: Vinod Kone <vi...@gmail.com>
Committed: Wed Dec 21 15:06:25 2016 -0500
----------------------------------------------------------------------
src/master/master.cpp | 22 +++++++++++-----------
src/master/master.hpp | 2 +-
2 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/f076bb8c/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 46ca84a..bbd16d4 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1861,7 +1861,7 @@ void Master::recoveredSlavesTimeout(const Registry& registry)
const string failure = "Agent removal rate limit acquisition failed";
acquire
- .then(defer(self(), &Self::markUnreachableAfterFailover, slave))
+ .then(defer(self(), &Self::markUnreachableAfterFailover, slave.info()))
.onFailed(lambda::bind(fail, failure, lambda::_1))
.onDiscarded(lambda::bind(fail, failure, "discarded"));
@@ -1870,13 +1870,13 @@ void Master::recoveredSlavesTimeout(const Registry& registry)
}
-Nothing Master::markUnreachableAfterFailover(const Registry::Slave& slave)
+Nothing Master::markUnreachableAfterFailover(const SlaveInfo& slave)
{
// The slave might have reregistered while we were waiting to
// acquire the rate limit.
- if (!slaves.recovered.contains(slave.info().id())) {
+ if (!slaves.recovered.contains(slave.id())) {
LOG(INFO) << "Canceling transition of agent "
- << slave.info().id() << " (" << slave.info().hostname() << ")"
+ << slave.id() << " (" << slave.hostname() << ")"
<< " to unreachable because it re-registered";
++metrics->slave_unreachable_canceled;
@@ -1884,17 +1884,17 @@ Nothing Master::markUnreachableAfterFailover(const Registry::Slave& slave)
}
// The slave might be in the process of reregistering.
- if (slaves.reregistering.contains(slave.info().id())) {
+ if (slaves.reregistering.contains(slave.id())) {
LOG(INFO) << "Canceling transition of agent "
- << slave.info().id() << " (" << slave.info().hostname() << ")"
+ << slave.id() << " (" << slave.hostname() << ")"
<< " to unreachable because it is re-registering";
++metrics->slave_unreachable_canceled;
return Nothing();
}
- LOG(WARNING) << "Agent " << slave.info().id()
- << " (" << slave.info().hostname() << ") did not re-register"
+ LOG(WARNING) << "Agent " << slave.id()
+ << " (" << slave.hostname() << ") did not re-register"
<< " within " << flags.agent_reregister_timeout
<< " after master failover; marking it unreachable";
@@ -1902,13 +1902,13 @@ Nothing Master::markUnreachableAfterFailover(const Registry::Slave& slave)
TimeInfo unreachableTime = protobuf::getCurrentTime();
- slaves.markingUnreachable.insert(slave.info().id());
+ slaves.markingUnreachable.insert(slave.id());
registrar->apply(Owned<Operation>(
- new MarkSlaveUnreachable(slave.info(), unreachableTime)))
+ new MarkSlaveUnreachable(slave, unreachableTime)))
.onAny(defer(self(),
&Self::_markUnreachableAfterFailover,
- slave.info(),
+ slave,
unreachableTime,
lambda::_1));
http://git-wip-us.apache.org/repos/asf/mesos/blob/f076bb8c/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 89b3c39..a8bb43b 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -710,7 +710,7 @@ protected:
// Mark a slave as unreachable in the registry. Called when the slave
// does not re-register in time after a master failover.
- Nothing markUnreachableAfterFailover(const Registry::Slave& slave);
+ Nothing markUnreachableAfterFailover(const SlaveInfo& slave);
void _markUnreachableAfterFailover(
const SlaveInfo& slaveInfo,
[5/5] mesos git commit: Improved slave recovery tests.
Posted by vi...@apache.org.
Improved slave recovery tests.
Check metrics, simplify/remove redundant code.
Review: https://reviews.apache.org/r/53237/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/744e7aa9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/744e7aa9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/744e7aa9
Branch: refs/heads/master
Commit: 744e7aa9b66cad0f4c0541380f03d0823d7f5c0f
Parents: f076bb8
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Dec 21 15:06:34 2016 -0500
Committer: Vinod Kone <vi...@gmail.com>
Committed: Wed Dec 21 15:06:34 2016 -0500
----------------------------------------------------------------------
.../docker_containerizer_tests.cpp | 2 -
src/tests/slave_recovery_tests.cpp | 98 +++++++++++---------
2 files changed, 54 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/744e7aa9/src/tests/containerizer/docker_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/docker_containerizer_tests.cpp b/src/tests/containerizer/docker_containerizer_tests.cpp
index 6035171..4e3b67b 100644
--- a/src/tests/containerizer/docker_containerizer_tests.cpp
+++ b/src/tests/containerizer/docker_containerizer_tests.cpp
@@ -2691,7 +2691,6 @@ TEST_F(DockerContainerizerTest, ROOT_DOCKER_SlaveRecoveryTaskContainer)
// Ensure the executor re-registers.
AWAIT_READY(reregisterExecutorMessage);
- UPID executorPid = reregisterExecutorMessage.get().from;
ReregisterExecutorMessage reregister;
reregister.ParseFromString(reregisterExecutorMessage.get().body);
@@ -2880,7 +2879,6 @@ TEST_F(DockerContainerizerTest,
// Ensure the executor re-registers.
AWAIT_READY(reregisterExecutorMessage);
- UPID executorPid = reregisterExecutorMessage.get().from;
ReregisterExecutorMessage reregister;
reregister.ParseFromString(reregisterExecutorMessage.get().body);
http://git-wip-us.apache.org/repos/asf/mesos/blob/744e7aa9/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 5b86c06..2e14f8a 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -242,6 +242,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverSlaveState)
// Capture the executor pids.
AWAIT_READY(registerExecutorMessage);
+
RegisterExecutorMessage registerExecutor;
registerExecutor.ParseFromString(registerExecutorMessage.get().body);
ExecutorID executorId = registerExecutor.executor_id();
@@ -400,9 +401,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverStatusUpdateManager)
driver.launchTasks(offers.get()[0].id(), {task});
- // Capture the executor pid.
AWAIT_READY(registerExecutor);
- UPID executorPid = registerExecutor.get().from;
// Wait for the status update drop.
AWAIT_READY(update);
@@ -774,8 +773,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
slave.get()->terminate();
- Future<Message> reregisterExecutorMessage =
- FUTURE_MESSAGE(Eq(ReregisterExecutorMessage().GetTypeName()), _, _);
+ Future<ReregisterExecutorMessage> reregister =
+ FUTURE_PROTOBUF(ReregisterExecutorMessage(), _, _);
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -791,15 +790,11 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
ASSERT_SOME(slave);
// Ensure the executor re-registers.
- AWAIT_READY(reregisterExecutorMessage);
- UPID executorPid = reregisterExecutorMessage.get().from;
-
- ReregisterExecutorMessage reregister;
- reregister.ParseFromString(reregisterExecutorMessage.get().body);
+ AWAIT_READY(reregister);
// Executor should inform about the unacknowledged update.
- EXPECT_EQ(1, reregister.updates_size());
- const StatusUpdate& update = reregister.updates(0);
+ ASSERT_EQ(1, reregister->updates_size());
+ const StatusUpdate& update = reregister->updates(0);
EXPECT_EQ(task.task_id(), update.status().task_id());
EXPECT_EQ(TASK_RUNNING, update.status().state());
@@ -982,7 +977,6 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
// Stop the slave before the executor is registered.
AWAIT_READY(registerExecutor);
- UPID executorPid = registerExecutor.get().from;
slave.get()->terminate();
@@ -1230,12 +1224,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedHTTPExecutor)
slave::state::FrameworkState frameworkState =
state.get().slave.get().frameworks.get(frameworkId.get()).get();
- EXPECT_EQ(1u, frameworkState.executors.size());
+ ASSERT_EQ(1u, frameworkState.executors.size());
slave::state::ExecutorState executorState =
frameworkState.executors.begin()->second;
- EXPECT_EQ(1u, executorState.runs.size());
+ ASSERT_EQ(1u, executorState.runs.size());
slave::state::RunState runState = executorState.runs.begin()->second;
@@ -2304,34 +2298,35 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
TaskInfo task = createTask(offers1.get()[0], "sleep 1000");
// Capture the slave and framework ids.
- SlaveID slaveId = offers1.get()[0].slave_id();
+ SlaveID slaveId1 = offers1.get()[0].slave_id();
FrameworkID frameworkId = offers1.get()[0].framework_id();
Future<Message> registerExecutorMessage =
FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
- Future<Nothing> status;
+ Future<Nothing> runningStatus;
EXPECT_CALL(sched, statusUpdate(_, _))
- .WillOnce(FutureSatisfy(&status))
+ .WillOnce(FutureSatisfy(&runningStatus))
.WillRepeatedly(Return()); // Ignore subsequent updates.
driver.launchTasks(offers1.get()[0].id(), {task});
// Capture the executor ID and PID.
AWAIT_READY(registerExecutorMessage);
+
RegisterExecutorMessage registerExecutor;
registerExecutor.ParseFromString(registerExecutorMessage.get().body);
ExecutorID executorId = registerExecutor.executor_id();
UPID executorPid = registerExecutorMessage.get().from;
// Wait for TASK_RUNNING update.
- AWAIT_READY(status);
+ AWAIT_READY(runningStatus);
// Capture the container ID.
Future<hashset<ContainerID>> containers = containerizer->containers();
AWAIT_READY(containers);
- EXPECT_EQ(1u, containers.get().size());
+ ASSERT_EQ(1u, containers.get().size());
ContainerID containerId = *containers.get().begin();
@@ -2341,7 +2336,7 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
// reboot.
string pidPath = paths::getForkedPidPath(
paths::getMetaRootDir(flags.work_dir),
- slaveId,
+ slaveId1,
frameworkId,
executorId,
containerId);
@@ -2364,8 +2359,8 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
paths::getBootIdPath(paths::getMetaRootDir(flags.work_dir)),
"rebooted! ;)"));
- Future<RegisterSlaveMessage> registerSlave =
- FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+ Future<SlaveRegisteredMessage> slaveRegistered =
+ FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
// Restart the slave (use same flags) with a new containerizer.
_containerizer = TypeParam::create(flags, true, &fetcher);
@@ -2380,13 +2375,25 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
slave = this->StartSlave(detector.get(), containerizer.get(), flags);
ASSERT_SOME(slave);
- AWAIT_READY(registerSlave);
+ AWAIT_READY(slaveRegistered);
+
+ SlaveID slaveId2 = slaveRegistered.get().slave_id();
+
+ EXPECT_NE(slaveId1, slaveId2);
// Make sure all slave resources are reoffered.
AWAIT_READY(offers2);
EXPECT_EQ(Resources(offers1.get()[0].resources()),
Resources(offers2.get()[0].resources()));
+ // The old agent ID is not removed (MESOS-5396).
+ JSON::Object stats = Metrics();
+ EXPECT_EQ(0, stats.values["master/tasks_lost"]);
+ EXPECT_EQ(0, stats.values["master/slave_unreachable_scheduled"]);
+ EXPECT_EQ(0, stats.values["master/slave_unreachable_completed"]);
+ EXPECT_EQ(0, stats.values["master/slave_removals"]);
+ EXPECT_EQ(0, stats.values["master/slave_removals/reason_registered"]);
+
driver.stop();
driver.join();
}
@@ -2442,8 +2449,8 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
SlaveID slaveId = offers1.get()[0].slave_id();
FrameworkID frameworkId = offers1.get()[0].framework_id();
- Future<Message> registerExecutorMessage =
- FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+ Future<RegisterExecutorMessage> registerExecutor =
+ FUTURE_PROTOBUF(RegisterExecutorMessage(), _, _);
Future<Nothing> status;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -2452,12 +2459,9 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
driver.launchTasks(offers1.get()[0].id(), {task});
- // Capture the executor id and pid.
- AWAIT_READY(registerExecutorMessage);
- RegisterExecutorMessage registerExecutor;
- registerExecutor.ParseFromString(registerExecutorMessage.get().body);
- ExecutorID executorId = registerExecutor.executor_id();
- UPID executorPid = registerExecutorMessage.get().from;
+ // Capture the executor id.
+ AWAIT_READY(registerExecutor);
+ ExecutorID executorId = registerExecutor->executor_id();
// Wait for TASK_RUNNING update.
AWAIT_READY(status);
@@ -2819,11 +2823,7 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
driver.launchTasks(offers.get()[0].id(), {task});
- // Capture the executor pid.
AWAIT_READY(registerExecutorMessage);
- RegisterExecutorMessage registerExecutor;
- registerExecutor.ParseFromString(registerExecutorMessage.get().body);
- UPID executorPid = registerExecutorMessage.get().from;
// Wait for TASK_RUNNING update.
AWAIT_READY(status);
@@ -2831,7 +2831,9 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
EXPECT_CALL(sched, slaveLost(_, _))
.Times(AtMost(1));
- slave.get()->terminate();
+ UPID slavePid = slave.get()->pid;
+
+ slave->reset();
Future<TaskStatus> status2;
EXPECT_CALL(sched, statusUpdate(_, _))
@@ -2840,7 +2842,7 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
// Spoof the registration attempt of a slave that failed recovery.
// We do this because simply restarting the slave will result in a slave
// with a different pid than the previous one.
- post(slave.get()->pid, master.get()->pid, registerSlaveMessage.get());
+ post(slavePid, master.get()->pid, registerSlaveMessage.get());
// Scheduler should get a TASK_LOST message.
AWAIT_READY(status2);
@@ -2849,6 +2851,14 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
EXPECT_EQ(TaskStatus::SOURCE_MASTER, status2.get().source());
EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status2.get().reason());
+ JSON::Object stats = Metrics();
+ EXPECT_EQ(1, stats.values["master/tasks_lost"]);
+ EXPECT_EQ(0, stats.values["master/tasks_unreachable"]);
+ EXPECT_EQ(0, stats.values["master/slave_unreachable_scheduled"]);
+ EXPECT_EQ(0, stats.values["master/slave_unreachable_completed"]);
+ EXPECT_EQ(1, stats.values["master/slave_removals"]);
+ EXPECT_EQ(1, stats.values["master/slave_removals/reason_registered"]);
+
driver.stop();
driver.join();
@@ -3202,12 +3212,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
slave::state::FrameworkState frameworkState =
state.get().slave.get().frameworks.get(frameworkId.get()).get();
- EXPECT_EQ(1u, frameworkState.executors.size());
+ ASSERT_EQ(1u, frameworkState.executors.size());
slave::state::ExecutorState executorState =
frameworkState.executors.begin()->second;
- EXPECT_EQ(1u, executorState.runs.size());
+ ASSERT_EQ(1u, executorState.runs.size());
slave::state::RunState runState = executorState.runs.begin()->second;
@@ -3796,7 +3806,7 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
ASSERT_SOME(slave1);
AWAIT_READY(offers1);
- EXPECT_EQ(1u, offers1.get().size());
+ ASSERT_FALSE(offers1->empty());
// Launch a long running task in the first slave.
TaskInfo task1 = createTask(offers1.get()[0], "sleep 1000");
@@ -3837,7 +3847,7 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
ASSERT_SOME(slave2);
AWAIT_READY(offers2);
- EXPECT_EQ(1u, offers2.get().size());
+ ASSERT_FALSE(offers2->empty());
// Launch a long running task in each slave.
TaskInfo task2 = createTask(offers2.get()[0], "sleep 1000");
@@ -4102,7 +4112,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
Future<hashset<ContainerID>> containers = containerizer->containers();
AWAIT_READY(containers);
- EXPECT_EQ(1u, containers.get().size());
+ ASSERT_EQ(1u, containers.get().size());
ContainerID containerId = *(containers.get().begin());
@@ -4194,7 +4204,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceForward)
Future<hashset<ContainerID>> containers = containerizer->containers();
AWAIT_READY(containers);
- EXPECT_EQ(1u, containers.get().size());
+ ASSERT_EQ(1u, containers.get().size());
ContainerID containerId = *(containers.get().begin());
@@ -4300,7 +4310,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceBackward)
Future<hashset<ContainerID>> containers = containerizer->containers();
AWAIT_READY(containers);
- EXPECT_EQ(1u, containers.get().size());
+ ASSERT_EQ(1u, containers.get().size());
ContainerID containerId = *(containers.get().begin());
[2/5] mesos git commit: Logged warning message if reading previous
boot ID fails.
Posted by vi...@apache.org.
Logged warning message if reading previous boot ID fails.
Not much we can do here, but as a general rule, we should never silently
ignore I/O errors.
Review: https://reviews.apache.org/r/53234/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c9807d60
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c9807d60
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c9807d60
Branch: refs/heads/master
Commit: c9807d60ad58a44513c812a6b6c08fbd352b441f
Parents: e5fe811
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Dec 21 15:06:13 2016 -0500
Committer: Vinod Kone <vi...@gmail.com>
Committed: Wed Dec 21 15:06:13 2016 -0500
----------------------------------------------------------------------
src/slave/state.cpp | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/c9807d60/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 8656694..7926117 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -75,11 +75,14 @@ Try<State> recover(const string& rootDir, bool strict)
// resources checkpoint file.
state.resources = resources.get();
- // Did the machine reboot? No need to recover slave state if the
- // machine has rebooted.
- if (os::exists(paths::getBootIdPath(rootDir))) {
- Try<string> read = os::read(paths::getBootIdPath(rootDir));
- if (read.isSome()) {
+ // If the machine has rebooted, skip recovering slave state.
+ const string& bootIdPath = paths::getBootIdPath(rootDir);
+ if (os::exists(bootIdPath)) {
+ Try<string> read = os::read(bootIdPath);
+ if (read.isError()) {
+ LOG(WARNING) << "Failed to read '"
+ << bootIdPath << "': " << read.error();
+ } else {
Try<string> id = os::bootId();
CHECK_SOME(id);
[3/5] mesos git commit: Cleaned up header includes and `using`.
Posted by vi...@apache.org.
Cleaned up header includes and `using`.
Review: https://reviews.apache.org/r/53235/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fcf38bf4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fcf38bf4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fcf38bf4
Branch: refs/heads/master
Commit: fcf38bf4fde10711abc40f1d5133a6d9f0540ed2
Parents: c9807d6
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Dec 21 15:06:19 2016 -0500
Committer: Vinod Kone <vi...@gmail.com>
Committed: Wed Dec 21 15:06:19 2016 -0500
----------------------------------------------------------------------
src/slave/state.cpp | 5 ++++-
src/slave/state.hpp | 3 +--
2 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/fcf38bf4/src/slave/state.cpp
----------------------------------------------------------------------
diff --git a/src/slave/state.cpp b/src/slave/state.cpp
index 7926117..6894875 100644
--- a/src/slave/state.cpp
+++ b/src/slave/state.cpp
@@ -28,14 +28,17 @@
#include <stout/numify.hpp>
#include <stout/path.hpp>
#include <stout/protobuf.hpp>
+#include <stout/strings.hpp>
#include <stout/try.hpp>
#include <stout/os/bootid.hpp>
#include <stout/os/close.hpp>
#include <stout/os/exists.hpp>
#include <stout/os/ftruncate.hpp>
+#include <stout/os/ls.hpp>
#include <stout/os/read.hpp>
#include <stout/os/realpath.hpp>
+#include <stout/os/stat.hpp>
#include "messages/messages.hpp"
@@ -48,8 +51,8 @@ namespace slave {
namespace state {
using std::list;
-using std::string;
using std::max;
+using std::string;
Try<State> recover(const string& rootDir, bool strict)
http://git-wip-us.apache.org/repos/asf/mesos/blob/fcf38bf4/src/slave/state.hpp
----------------------------------------------------------------------
diff --git a/src/slave/state.hpp b/src/slave/state.hpp
index 8c57fd6..a497ce1 100644
--- a/src/slave/state.hpp
+++ b/src/slave/state.hpp
@@ -28,12 +28,11 @@
#include <process/pid.hpp>
-#include <stout/foreach.hpp>
#include <stout/hashmap.hpp>
#include <stout/hashset.hpp>
#include <stout/path.hpp>
#include <stout/protobuf.hpp>
-#include <stout/strings.hpp>
+#include <stout/try.hpp>
#include <stout/utils.hpp>
#include <stout/uuid.hpp>