You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 07:28:24 UTC

svn commit: r1131776 - /incubator/mesos/trunk/src/master_detector.cpp

Author: benh
Date: Sun Jun  5 05:28:24 2011
New Revision: 1131776

URL: http://svn.apache.org/viewvc?rev=1131776&view=rev
Log:
Small bug when reconnecting to ZooKeeper.

Modified:
    incubator/mesos/trunk/src/master_detector.cpp

Modified: incubator/mesos/trunk/src/master_detector.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master_detector.cpp?rev=1131776&r1=1131775&r2=1131776&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master_detector.cpp (original)
+++ incubator/mesos/trunk/src/master_detector.cpp Sun Jun  5 05:28:24 2011
@@ -76,7 +76,7 @@ void MasterDetector::process(ZooKeeper *
 	      zk->error(ret), servers.c_str());
 
       if (contend) {
-	// We use the contend with the pid given in constructor.
+	// We contend with the pid given in constructor.
 	ret = zk->create(znode + "/", pid, ZOO_CREATOR_ALL_ACL,
 			 ZOO_SEQUENCE | ZOO_EPHEMERAL, &result);
 
@@ -96,22 +96,30 @@ void MasterDetector::process(ZooKeeper *
       // Now determine who the master is (it may be us).
       detectMaster();
     } else {
-      // Reconnected. Make sure our ephemeral sequence znode is still there.
-      ret = zk->get(znode + "/" + mySeq, false, &result, NULL);
+      // Reconnected.
+      if (contend) {
+	// Contending for master, confirm our ephemeral sequence znode exists.
+	ret = zk->get(znode + "/" + mySeq, false, &result, NULL);
 
-      // We might no longer be the master! Commit suicide for now
-      // (hoping another master is on standbye), but in the future
-      // it would be nice if we could go back on standbye.
-      if (ret == ZNONODE)
-	fatal("failed to reconnect to ZooKeeper quickly enough "
-	      "(our ephemeral sequence znode is gone), commiting suicide!");
-
-      if (ret != ZOK)
-	fatal("ZooKeeper not responding correctly (%s). "
-	      "Make sure ZooKeeper is running on: %s",
-	      zk->error(ret), servers.c_str());
+	// We might no longer be the master! Commit suicide for now
+	// (hoping another master is on standbye), but in the future
+	// it would be nice if we could go back on standbye.
+	if (ret == ZNONODE)
+	  fatal("failed to reconnect to ZooKeeper quickly enough "
+		"(our ephemeral sequence znode is gone), commiting suicide!");
+
+	if (ret != ZOK)
+	  fatal("ZooKeeper not responding correctly (%s). "
+		"Make sure ZooKeeper is running on: %s",
+		zk->error(ret), servers.c_str());
+
+	// We are still the master!
+	LOG(INFO) << "Reconnected to Zookeeper, still acting as master.";
+      } else {
+	// Reconnected, but maybe the master changed?
+	detectMaster();
+      }
 
-      // We are still the master!
       reconnect = false;
     }
   } else if ((state == ZOO_CONNECTED_STATE) && (type == ZOO_CHILD_EVENT)) {