You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 11:24:21 UTC

svn commit: r1132320 - in /incubator/mesos/trunk/src: master/master.cpp slave/slave.cpp

Author: benh
Date: Sun Jun  5 09:24:21 2011
New Revision: 1132320

URL: http://svn.apache.org/viewvc?rev=1132320&view=rev
Log:
"Fixed" the current semantics of session expiration for the slaves and
add a big comment in the master explaining this.

Modified:
    incubator/mesos/trunk/src/master/master.cpp
    incubator/mesos/trunk/src/slave/slave.cpp

Modified: incubator/mesos/trunk/src/master/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.cpp?rev=1132320&r1=1132319&r2=1132320&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.cpp (original)
+++ incubator/mesos/trunk/src/master/master.cpp Sun Jun  5 09:24:21 2011
@@ -883,11 +883,24 @@ void Master::reregisterSlave(const Slave
     LOG(ERROR) << "Slave re-registered without an id!";
     send(from(), process::TERMINATE);
   } else {
-    if (lookupSlave(slaveId) != NULL) {
-      // TODO(benh): Once we support handling session expiration, we
-      // will want to handle having a slave re-register with us when
-      // we already have them recorded.
-      LOG(ERROR) << "Slave re-registered with in use id!";
+    Slave* slave = lookupSlave(slaveId);
+    if (slave != NULL) {
+      // TODO(benh): It's still unclear whether or not
+      // MasterDetector::detectMaster will cause spurious
+      // Slave::newMasterDetected to get invoked even though the
+      // ephemeral znode hasn't changed. If that does happen, the
+      // re-register that the slave is trying to do is just
+      // bogus. Letting it re-register might not be all that bad now,
+      // but maybe in the future it's bad because during that
+      // "disconnected" time it might not have received certain
+      // messages from us (like launching a task), and so until we
+      // have some form of task reconciliation between all the
+      // different components, the safe thing to do is have the slave
+      // restart (kind of defeats the purpose of session expiration
+      // support in ZooKeeper if the spurious calls happen each time).
+      LOG(ERROR) << "Slave at " << from()
+		 << " attempted to re-register with an already in use id ("
+		 << slaveId << ")";
       send(from(), process::TERMINATE);
     } else {
       Slave* slave = new Slave(slaveInfo, slaveId, from(), elapsed());
@@ -1109,6 +1122,7 @@ void Master::deactivatedSlaveHostnamePor
         LOG(WARNING) << "Removing slave " << slave->slaveId << " at "
 		     << hostname << ":" << port
                      << " because it has been deactivated";
+	send(slave->pid, process::TERMINATE);
         removeSlave(slave);
         break;
       }
@@ -1649,6 +1663,7 @@ void Master::readdSlave(Slave* slave, co
 void Master::removeSlave(Slave* slave)
 { 
   slave->active = false;
+
   // TODO: Notify allocator that a slave removal is beginning?
   
   // Remove pointers to slave's tasks in frameworks, and send status updates

Modified: incubator/mesos/trunk/src/slave/slave.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.cpp?rev=1132320&r1=1132319&r2=1132320&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.cpp (original)
+++ incubator/mesos/trunk/src/slave/slave.cpp Sun Jun  5 09:24:21 2011
@@ -258,7 +258,7 @@ void Slave::initialize()
 
 void Slave::newMasterDetected(const string& pid)
 {
-  LOG(INFO) << "New master at " << pid;
+  LOG(INFO) << "New master detected at " << pid;
 
   master = pid;
   link(master);
@@ -276,9 +276,9 @@ void Slave::newMasterDetected(const stri
 
     foreachpair (_, Framework* framework, frameworks) {
       foreachpair (_, Executor* executor, framework->executors) {
-        foreachpair (_, Task* task, executor->tasks) {
-          out.add_tasks()->MergeFrom(*task);
-        }
+	foreachpair (_, Task* task, executor->tasks) {
+	  out.add_tasks()->MergeFrom(*task);
+	}
       }
     }