You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ph...@apache.org on 2008/10/08 20:00:34 UTC

svn commit: r702943 - in /hadoop/zookeeper/trunk: CHANGES.txt src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java src/java/main/org/apache/zookeeper/server/quorum/Vote.java

Author: phunt
Date: Wed Oct  8 11:00:33 2008
New Revision: 702943

URL: http://svn.apache.org/viewvc?rev=702943&view=rev
Log:
ZOOKEEPER-159. Cover two corner cases of leader election

Modified:
    hadoop/zookeeper/trunk/CHANGES.txt
    hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
    hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java

Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Wed Oct  8 11:00:33 2008
@@ -20,6 +20,8 @@
 
   BUGFIXES: 
 
+  ZOOKEEPER-159. Cover two corner cases of leader election
+
   ZOOKEEPER-156. update programmer guide with acl details from old wiki page
   (phunt)
 

Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java Wed Oct  8 11:00:33 2008
@@ -445,20 +445,21 @@
      *  @param zxid     zxid of the the vote received last
      */
     private boolean termPredicate(
-            HashMap<Long, Vote> votes, long l,
-            long zxid) {
+            HashMap<Long, Vote> votes, 
+            Vote vote) {
 
-        int count = 0;
         Collection<Vote> votesCast = votes.values();
+        int count = 0;
+        
         /*
          * First make the views consistent. Sometimes peers will have
          * different zxids for a server depending on timing.
          */
         for (Vote v : votesCast) {
-            if ((v.id == l) && (v.zxid == zxid))
+            if (v.equals(vote))
                 count++;
         }
-        
+                      
         if (count > (self.quorumPeers.size() / 2))
             return true;
         else
@@ -466,6 +467,29 @@
 
     }
 
+    /**
+     * In the case there is a leader elected, and a quorum supporting 
+     * this leader, we have to check if the leader has voted and acked
+     * that it is leading. We need this check to avoid that peers keep
+     * electing over and over a peer that has crashed and it is no
+     * longer leading.
+     * 
+     * @param votes set of votes
+     * @param   leader  leader id
+     * @param   epoch   epoch id
+     */
+    private boolean checkLeader(
+            HashMap<Long, Vote> votes,
+            long leader,
+            long epoch){
+        
+        boolean predicate = true;
+        if(votes.get(leader) == null) predicate = false;
+        else if(votes.get(leader).state != ServerState.LEADING) predicate = false;
+        
+        return predicate;
+    }
+    
     synchronized void updateProposal(long leader, long zxid){
         proposedLeader = leader;
         proposedZxid = zxid;
@@ -522,7 +546,7 @@
                 if (n.epoch > logicalclock) {
                     logicalclock = n.epoch;
                     recvset.clear();
-                    updateProposal(n.leader, n.zxid);
+                    updateProposal(self.getId(), self.getLastLoggedZxid());
                     sendNotifications();
                 } else if (n.epoch < logicalclock) {
                     break;
@@ -531,7 +555,7 @@
                     sendNotifications();
                 }
                 
-                recvset.put(n.sid, new Vote(n.leader, n.zxid));
+                recvset.put(n.sid, new Vote(n.leader, n.zxid, n.epoch));
 
                 //If have received from all nodes, then terminate
                 if (self.quorumPeers.size() == recvset.size()) {
@@ -540,7 +564,7 @@
                     leaveInstance();
                     return new Vote(proposedLeader, proposedZxid);
 
-                } else if (termPredicate(recvset, proposedLeader, proposedZxid)) {
+                } else if (termPredicate(recvset, new Vote(proposedLeader, proposedZxid, logicalclock))) {
                     //Otherwise, wait for a fixed amount of time
                     LOG.debug("Passed predicate");
 
@@ -565,15 +589,16 @@
             case LEADING:
             case FOLLOWING:
                 LOG.info("Notification: " + n.leader + ", " + n.zxid + ", " + n.epoch + ", " + self.getId() + ", " + self.getPeerState() + ", " + n.state + ", " + n.sid);
-              
-                if(n.epoch >= logicalclock) 
-                    outofelection.put(n.sid, new Vote(n.leader, n.zxid));
+       
+                outofelection.put(n.sid, new Vote(n.leader, n.zxid, n.epoch, n.state));
 
-                if (termPredicate(outofelection, n.leader, n.zxid)) {
-                    
-                    self.setPeerState((n.leader == self.getId()) ? 
+                if (termPredicate(outofelection, new Vote(n.leader, n.zxid, n.epoch, n.state))
+                        && checkLeader(outofelection, n.leader, n.epoch)) {
+                    synchronized(this){
+                        logicalclock = n.epoch;
+                        self.setPeerState((n.leader == self.getId()) ? 
                             ServerState.LEADING: ServerState.FOLLOWING);
-
+                    }
                     leaveInstance();
                     return new Vote(n.leader, n.zxid);
                 }

Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java?rev=702943&r1=702942&r2=702943&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Vote.java Wed Oct  8 11:00:33 2008
@@ -18,6 +18,8 @@
 
 package org.apache.zookeeper.server.quorum;
 
+import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
+
 
 public class Vote {
     public Vote(long id, long zxid) {
@@ -25,17 +27,34 @@
         this.zxid = zxid;
     }
 
+    public Vote(long id, long zxid, long epoch) {
+        this.id = id;
+        this.zxid = zxid;
+        this.epoch = epoch;
+    }
+    
+    public Vote(long id, long zxid, long epoch, ServerState state) {
+        this.id = id;
+        this.zxid = zxid;
+        this.epoch = epoch;
+        this.state = state;
+    }
+    
     public long id;
     
     public long zxid;
     
+    public long epoch = -1;
+    
+    public ServerState state = ServerState.LOOKING;
+    
     @Override
     public boolean equals(Object o) {
         if (!(o instanceof Vote)) {
             return false;
         }
         Vote other = (Vote) o;
-        return id == other.id && zxid == other.zxid;
+        return (id == other.id && zxid == other.zxid && epoch == other.epoch);
 
     }