You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2013/12/03 20:42:56 UTC
[1/2] git commit: Fixed MESOS-851: The drivers now ignore queued
messages when aborted.
Updated Branches:
refs/heads/master bfb16a2e1 -> 4173ec935
Fixed MESOS-851: The drivers now ignore queued messages when aborted.
Review: https://reviews.apache.org/r/15872
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/853c606c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/853c606c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/853c606c
Branch: refs/heads/master
Commit: 853c606c0a8de6346aeefac9b48874d4d4ac38d5
Parents: bfb16a2
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Tue Nov 26 19:39:28 2013 -0800
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Tue Dec 3 11:26:18 2013 -0800
----------------------------------------------------------------------
src/exec/exec.cpp | 14 ++++++++++++--
src/sched/sched.cpp | 25 +++++++++++++++++--------
2 files changed, 29 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/853c606c/src/exec/exec.cpp
----------------------------------------------------------------------
diff --git a/src/exec/exec.cpp b/src/exec/exec.cpp
index a58203b..26a927a 100644
--- a/src/exec/exec.cpp
+++ b/src/exec/exec.cpp
@@ -407,7 +407,7 @@ protected:
void abort()
{
VLOG(1) << "De-activating the executor libprocess";
- aborted = true;
+ CHECK(aborted);
Lock lock(mutex);
pthread_cond_signal(cond);
@@ -548,7 +548,7 @@ private:
bool connected; // Registered with the slave.
UUID connection; // UUID to identify the connection instance.
bool local;
- bool aborted;
+ volatile bool aborted;
pthread_mutex_t* mutex;
pthread_cond_t* cond;
const string directory;
@@ -742,6 +742,16 @@ Status MesosExecutorDriver::abort()
CHECK(process != NULL);
+ // We set the volatile aborted to true here to prevent any further
+ // messages from being processed in the ExecutorProcess. However,
+ // if abort() is called from another thread as the ExecutorProcess,
+ // there may be at most one additional message processed.
+ // TODO(bmahler): Use an atomic boolean.
+ process->aborted = true;
+
+ // Dispatching here ensures that we still process the outstanding
+ // requests *from* the executor, since those do proceed when
+ // aborted is true.
dispatch(process, &ExecutorProcess::abort);
return status = DRIVER_ABORTED;
http://git-wip-us.apache.org/repos/asf/mesos/blob/853c606c/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index b958435..c465356 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -743,18 +743,17 @@ protected:
{
VLOG(1) << "Aborting framework '" << framework.id() << "'";
- aborted = true;
+ CHECK(aborted);
if (!connected) {
VLOG(1) << "Not sending a deactivate message as master is disconnected";
- return;
+ } else {
+ DeactivateFrameworkMessage message;
+ message.mutable_framework_id()->MergeFrom(framework.id());
+ CHECK_SOME(master);
+ send(master.get(), message);
}
- DeactivateFrameworkMessage message;
- message.mutable_framework_id()->MergeFrom(framework.id());
- CHECK_SOME(master);
- send(master.get(), message);
-
Lock lock(mutex);
pthread_cond_signal(cond);
}
@@ -982,7 +981,7 @@ private:
bool failover;
Result<UPID> master;
- volatile bool connected; // Flag to indicate if framework is registered.
+ bool connected; // Flag to indicate if framework is registered.
volatile bool aborted; // Flag to indicate if the driver is aborted.
MasterDetector* detector;
@@ -1269,6 +1268,16 @@ Status MesosSchedulerDriver::abort()
CHECK(process != NULL);
+ // We set the volatile aborted to true here to prevent any further
+ // messages from being processed in the SchedulerProcess. However,
+ // if abort() is called from another thread as the SchedulerProcess,
+ // there may be at most one additional message processed.
+ // TODO(bmahler): Use an atomic boolean.
+ process->aborted = true;
+
+ // Dispatching here ensures that we still process the outstanding
+ // requests *from* the scheduler, since those do proceed when
+ // aborted is true.
dispatch(process, &SchedulerProcess::abort);
return status = DRIVER_ABORTED;
[2/2] git commit: Fixed a flaky test:
FaultToleranceTest.ReregisterFrameworkExitedExecutor.
Posted by bm...@apache.org.
Fixed a flaky test: FaultToleranceTest.ReregisterFrameworkExitedExecutor.
Review: https://reviews.apache.org/r/15955
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4173ec93
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4173ec93
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4173ec93
Branch: refs/heads/master
Commit: 4173ec9358c7882eafdbf0dd8ef99173b3f05257
Parents: 853c606
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Mon Dec 2 17:33:55 2013 -0800
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Tue Dec 3 11:36:59 2013 -0800
----------------------------------------------------------------------
src/tests/fault_tolerance_tests.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/4173ec93/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 27095cc..7157292 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -1175,17 +1175,18 @@ TEST_F(FaultToleranceTest, ReregisterFrameworkExitedExecutor)
// Wait for the slave to re-register.
AWAIT_READY(slaveReregisteredMessage);
- // Allow the executor exited message and drop the status update.
+ // Allow the executor exited message and drop the status update,
+ // it's possible for a duplicate update to occur if the status
+ // update manager is notified of the new master after the task was
+ // killed.
Future<ExitedExecutorMessage> executorExitedMessage =
FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _);
- Future<StatusUpdateMessage> statusUpdateMessage =
- DROP_PROTOBUF(StatusUpdateMessage(), _, _);
+ DROP_PROTOBUFS(StatusUpdateMessage(), _, _);
// Now kill the executor.
dispatch(isolator, &Isolator::killExecutor, frameworkId, DEFAULT_EXECUTOR_ID);
AWAIT_READY(executorExitedMessage);
- AWAIT_READY(statusUpdateMessage);
// Now notify the framework of the new master.
Future<FrameworkRegisteredMessage> frameworkRegisteredMessage2 =