You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 11:24:21 UTC
svn commit: r1132320 - in /incubator/mesos/trunk/src: master/master.cpp
slave/slave.cpp
Author: benh
Date: Sun Jun 5 09:24:21 2011
New Revision: 1132320
URL: http://svn.apache.org/viewvc?rev=1132320&view=rev
Log:
"Fixed" the current semantics of session expiration for the slaves and
add a big comment in the master explaining this.
Modified:
incubator/mesos/trunk/src/master/master.cpp
incubator/mesos/trunk/src/slave/slave.cpp
Modified: incubator/mesos/trunk/src/master/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.cpp?rev=1132320&r1=1132319&r2=1132320&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.cpp (original)
+++ incubator/mesos/trunk/src/master/master.cpp Sun Jun 5 09:24:21 2011
@@ -883,11 +883,24 @@ void Master::reregisterSlave(const Slave
LOG(ERROR) << "Slave re-registered without an id!";
send(from(), process::TERMINATE);
} else {
- if (lookupSlave(slaveId) != NULL) {
- // TODO(benh): Once we support handling session expiration, we
- // will want to handle having a slave re-register with us when
- // we already have them recorded.
- LOG(ERROR) << "Slave re-registered with in use id!";
+ Slave* slave = lookupSlave(slaveId);
+ if (slave != NULL) {
+ // TODO(benh): It's still unclear whether or not
+ // MasterDetector::detectMaster will cause spurious
+ // Slave::newMasterDetected to get invoked even though the
+ // ephemeral znode hasn't changed. If that does happen, the
+ // re-register that the slave is trying to do is just
+ // bogus. Letting it re-register might not be all that bad now,
+ // but maybe in the future it's bad because during that
+ // "disconnected" time it might not have received certain
+ // messages from us (like launching a task), and so until we
+ // have some form of task reconciliation between all the
+ // different components, the safe thing to do is have the slave
+ // restart (kind of defeats the purpose of session expiration
+ // support in ZooKeeper if the spurious calls happen each time).
+ LOG(ERROR) << "Slave at " << from()
+ << " attempted to re-register with an already in use id ("
+ << slaveId << ")";
send(from(), process::TERMINATE);
} else {
Slave* slave = new Slave(slaveInfo, slaveId, from(), elapsed());
@@ -1109,6 +1122,7 @@ void Master::deactivatedSlaveHostnamePor
LOG(WARNING) << "Removing slave " << slave->slaveId << " at "
<< hostname << ":" << port
<< " because it has been deactivated";
+ send(slave->pid, process::TERMINATE);
removeSlave(slave);
break;
}
@@ -1649,6 +1663,7 @@ void Master::readdSlave(Slave* slave, co
void Master::removeSlave(Slave* slave)
{
slave->active = false;
+
// TODO: Notify allocator that a slave removal is beginning?
// Remove pointers to slave's tasks in frameworks, and send status updates
Modified: incubator/mesos/trunk/src/slave/slave.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.cpp?rev=1132320&r1=1132319&r2=1132320&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.cpp (original)
+++ incubator/mesos/trunk/src/slave/slave.cpp Sun Jun 5 09:24:21 2011
@@ -258,7 +258,7 @@ void Slave::initialize()
void Slave::newMasterDetected(const string& pid)
{
- LOG(INFO) << "New master at " << pid;
+ LOG(INFO) << "New master detected at " << pid;
master = pid;
link(master);
@@ -276,9 +276,9 @@ void Slave::newMasterDetected(const stri
foreachpair (_, Framework* framework, frameworks) {
foreachpair (_, Executor* executor, framework->executors) {
- foreachpair (_, Task* task, executor->tasks) {
- out.add_tasks()->MergeFrom(*task);
- }
+ foreachpair (_, Task* task, executor->tasks) {
+ out.add_tasks()->MergeFrom(*task);
+ }
}
}