You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ph...@apache.org on 2008/10/08 20:00:34 UTC
svn commit: r702943 - in /hadoop/zookeeper/trunk: CHANGES.txt
src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
src/java/main/org/apache/zookeeper/server/quorum/Vote.java
Author: phunt
Date: Wed Oct 8 11:00:33 2008
New Revision: 702943
URL: http://svn.apache.org/viewvc?rev=702943&view=rev
Log:
ZOOKEEPER-159. Cover two corner cases of leader election
Modified:
hadoop/zookeeper/trunk/CHANGES.txt
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java
Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Wed Oct 8 11:00:33 2008
@@ -20,6 +20,8 @@
BUGFIXES:
+ ZOOKEEPER-159. Cover two corner cases of leader election
+
ZOOKEEPER-156. update programmer guide with acl details from old wiki page
(phunt)
Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java Wed Oct 8 11:00:33 2008
@@ -445,20 +445,21 @@
* @param zxid zxid of the the vote received last
*/
private boolean termPredicate(
- HashMap<Long, Vote> votes, long l,
- long zxid) {
+ HashMap<Long, Vote> votes,
+ Vote vote) {
- int count = 0;
Collection<Vote> votesCast = votes.values();
+ int count = 0;
+
/*
* First make the views consistent. Sometimes peers will have
* different zxids for a server depending on timing.
*/
for (Vote v : votesCast) {
- if ((v.id == l) && (v.zxid == zxid))
+ if (v.equals(vote))
count++;
}
-
+
if (count > (self.quorumPeers.size() / 2))
return true;
else
@@ -466,6 +467,29 @@
}
+ /**
+ * In the case there is a leader elected, and a quorum supporting
+ * this leader, we have to check if the leader has voted and acked
+ * that it is leading. We need this check to avoid that peers keep
+ * electing over and over a peer that has crashed and it is no
+ * longer leading.
+ *
+ * @param votes set of votes
+ * @param leader leader id
+ * @param epoch epoch id
+ */
+ private boolean checkLeader(
+ HashMap<Long, Vote> votes,
+ long leader,
+ long epoch){
+
+ boolean predicate = true;
+ if(votes.get(leader) == null) predicate = false;
+ else if(votes.get(leader).state != ServerState.LEADING) predicate = false;
+
+ return predicate;
+ }
+
synchronized void updateProposal(long leader, long zxid){
proposedLeader = leader;
proposedZxid = zxid;
@@ -522,7 +546,7 @@
if (n.epoch > logicalclock) {
logicalclock = n.epoch;
recvset.clear();
- updateProposal(n.leader, n.zxid);
+ updateProposal(self.getId(), self.getLastLoggedZxid());
sendNotifications();
} else if (n.epoch < logicalclock) {
break;
@@ -531,7 +555,7 @@
sendNotifications();
}
- recvset.put(n.sid, new Vote(n.leader, n.zxid));
+ recvset.put(n.sid, new Vote(n.leader, n.zxid, n.epoch));
//If have received from all nodes, then terminate
if (self.quorumPeers.size() == recvset.size()) {
@@ -540,7 +564,7 @@
leaveInstance();
return new Vote(proposedLeader, proposedZxid);
- } else if (termPredicate(recvset, proposedLeader, proposedZxid)) {
+ } else if (termPredicate(recvset, new Vote(proposedLeader, proposedZxid, logicalclock))) {
//Otherwise, wait for a fixed amount of time
LOG.debug("Passed predicate");
@@ -565,15 +589,16 @@
case LEADING:
case FOLLOWING:
LOG.info("Notification: " + n.leader + ", " + n.zxid + ", " + n.epoch + ", " + self.getId() + ", " + self.getPeerState() + ", " + n.state + ", " + n.sid);
-
- if(n.epoch >= logicalclock)
- outofelection.put(n.sid, new Vote(n.leader, n.zxid));
+
+ outofelection.put(n.sid, new Vote(n.leader, n.zxid, n.epoch, n.state));
- if (termPredicate(outofelection, n.leader, n.zxid)) {
-
- self.setPeerState((n.leader == self.getId()) ?
+ if (termPredicate(outofelection, new Vote(n.leader, n.zxid, n.epoch, n.state))
+ && checkLeader(outofelection, n.leader, n.epoch)) {
+ synchronized(this){
+ logicalclock = n.epoch;
+ self.setPeerState((n.leader == self.getId()) ?
ServerState.LEADING: ServerState.FOLLOWING);
-
+ }
leaveInstance();
return new Vote(n.leader, n.zxid);
}
Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java Wed Oct 8 11:00:33 2008
@@ -18,6 +18,8 @@
package org.apache.zookeeper.server.quorum;
+import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
+
public class Vote {
public Vote(long id, long zxid) {
@@ -25,17 +27,34 @@
this.zxid = zxid;
}
+ public Vote(long id, long zxid, long epoch) {
+ this.id = id;
+ this.zxid = zxid;
+ this.epoch = epoch;
+ }
+
+ public Vote(long id, long zxid, long epoch, ServerState state) {
+ this.id = id;
+ this.zxid = zxid;
+ this.epoch = epoch;
+ this.state = state;
+ }
+
public long id;
public long zxid;
+ public long epoch = -1;
+
+ public ServerState state = ServerState.LOOKING;
+
@Override
public boolean equals(Object o) {
if (!(o instanceof Vote)) {
return false;
}
Vote other = (Vote) o;
- return id == other.id && zxid == other.zxid;
+ return (id == other.id && zxid == other.zxid && epoch == other.epoch);
}