You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/06/27 02:56:14 UTC

git commit: Fixed slave to validate slave id during (re-)registration.

Updated Branches:
  refs/heads/master cf5b42c9a -> 4874afe90


Fixed slave to validate slave id during (re-)registration.

Review: https://reviews.apache.org/r/12122


Project: http://git-wip-us.apache.org/repos/asf/incubator-mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mesos/commit/4874afe9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mesos/tree/4874afe9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mesos/diff/4874afe9

Branch: refs/heads/master
Commit: 4874afe90d4341895fe6008637a397e8d52cc092
Parents: cf5b42c
Author: Vinod Kone <vi...@twitter.com>
Authored: Wed Jun 26 14:44:07 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Wed Jun 26 17:52:56 2013 -0700

----------------------------------------------------------------------
 src/master/master.cpp |  2 +-
 src/slave/slave.cpp   | 24 +++++++++++++++++-------
 2 files changed, 18 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/4874afe9/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index a0b862a..4da8773 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -457,7 +457,7 @@ void Master::exited(const UPID& pid)
   //         its tasks transitioned to LOST and resources recovered.
   foreachvalue (Slave* slave, slaves) {
     if (slave->pid == pid) {
-      LOG(INFO) << "Slave " << slave->id << "(" << slave->info.hostname()
+      LOG(INFO) << "Slave " << slave->id << " (" << slave->info.hostname()
                 << ") disconnected";
 
       // Remove the slave, if it is not checkpointing.

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/4874afe9/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 9985f14..40c39bc 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -604,7 +604,12 @@ void Slave::registered(const SlaveID& slaveId)
       break;
     }
     case RUNNING:
-      // Already registered. Ignore registration.
+      // Already registered!
+      if (!(info.id() == slaveId)) {
+       EXIT(1) << "Registered but got wrong id: " << slaveId
+               << "(expected: " << info.id() << "). Committing suicide";
+      }
+      LOG(WARNING) << "Already registered with master " << master;
       break;
     case TERMINATING:
       LOG(WARNING) << "Ignoring registration because slave is terminating";
@@ -625,12 +630,17 @@ void Slave::reregistered(const SlaveID& slaveId)
 
       state = RUNNING;
       if (!(info.id() == slaveId)) {
-        LOG(FATAL) << "Slave re-registered but got wrong id: " << slaveId
-                   << "(expected: " << info.id() << ")";
+        EXIT(1) << "Re-registered but got wrong id: " << slaveId
+                << "(expected: " << info.id() << "). Committing suicide";
       }
       break;
     case RUNNING:
-      // Already registered. Ignore registration.
+      // Already re-registered!
+      if (!(info.id() == slaveId)) {
+        EXIT(1) << "Re-registered but got wrong id: " << slaveId
+                << "(expected: " << info.id() << "). Committing suicide";
+      }
+      LOG(WARNING) << "Already re-registered with master " << master;
       break;
     case TERMINATING:
       LOG(WARNING) << "Ignoring re-registration because slave is terminating";
@@ -732,8 +742,8 @@ void Slave::runTask(
   // the master when the slave re-registers.
 
   if (!(task.slave_id() == info.id())) {
-    LOG(WARNING) << "Ignoring task " << task.task_id()
-                 << " because it was intended for the old slave " << info.id();
+    LOG(WARNING) << "Slave " << info.id() << " ignoring task " << task.task_id()
+                 << " because it was intended for old slave " << task.slave_id();
     return;
   }
 
@@ -1678,7 +1688,7 @@ void Slave::reregisterExecutorTimeout()
 // 2) When slave generates task updates (e.g LOST/KILLED/FAILED).
 void Slave::statusUpdate(const StatusUpdate& update)
 {
-  LOG(INFO) << "Handling status update " << update;
+  LOG(INFO) << "Handling status update " << update << " from " << from;
 
   CHECK(state == RECOVERING || state == DISCONNECTED ||
         state == RUNNING || state == TERMINATING)