You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by fp...@apache.org on 2010/02/20 15:26:00 UTC

svn commit: r912119 - in /hadoop/zookeeper/trunk: ./ src/java/main/org/apache/zookeeper/server/quorum/ src/java/test/org/apache/zookeeper/test/

Author: fpj
Date: Sat Feb 20 14:26:00 2010
New Revision: 912119

URL: http://svn.apache.org/viewvc?rev=912119&view=rev
Log:
ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader
election (henry via flavio)


Added:
    hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java
Modified:
    hadoop/zookeeper/trunk/CHANGES.txt
    hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java
    hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
    hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java

Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Sat Feb 20 14:26:00 2010
@@ -228,6 +228,9 @@
   ZOOKEEPER-668. Close method in LedgerInputStream doesn't do anything (flavio 
   via mahadev)
 
+  ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader 
+  election (henry via flavio)
+
 IMPROVEMENTS:
   ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to
   "socket reuse" and failure to close client (phunt via mahadev)

Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java Sat Feb 20 14:26:00 2010
@@ -198,40 +198,50 @@
                         // down
                     }
                 }
+
                 ElectionResult result = countVotes(votes, heardFrom);
-                if (result.winner.id >= 0) {
-                    self.setCurrentVote(result.vote);
-                    // To do: this doesn't use a quorum verifier
-                    if (result.winningCount > (self.getVotingView().size() / 2)) {
-                        self.setCurrentVote(result.winner);
-                        s.close();
-                        Vote current = self.getCurrentVote();
-                        LOG.info("Found leader: my type is: " + self.getPeerType());
-                        /**
-                         * We want to make sure we implement the state machine
-                         * correctly. If we are a PARTICIPANT, once a leader
-                         * is elected we can move either to LEADING or 
-                         * FOLLOWING. However if we are an OBSERVER, it is an
-                         * error to be elected as a Leader.
-                         */
-                        if (self.getPeerType() == LearnerType.OBSERVER) {
-                            if (current.id == self.getId()) {
-                                // This should never happen!
-                                LOG.error("OBSERVER elected as leader!");
-                                Thread.sleep(100);
-                            }
-                            else {
-                                self.setPeerState(ServerState.OBSERVING);
-                                Thread.sleep(100);
+                // ZOOKEEPER-569:
+                // If no votes are received for live peers, reset to voting 
+                // for ourselves as otherwise we may hang on to a vote 
+                // for a dead peer                 
+                if (votes.size() == 0) {                    
+                    self.setCurrentVote(new Vote(self.getId(),
+                            self.getLastLoggedZxid()));
+                } else {
+                    if (result.winner.id >= 0) {
+                        self.setCurrentVote(result.vote);
+                        // To do: this doesn't use a quorum verifier
+                        if (result.winningCount > (self.getVotingView().size() / 2)) {
+                            self.setCurrentVote(result.winner);
+                            s.close();
+                            Vote current = self.getCurrentVote();
+                            LOG.info("Found leader: my type is: " + self.getPeerType());
+                            /*
+                             * We want to make sure we implement the state machine
+                             * correctly. If we are a PARTICIPANT, once a leader
+                             * is elected we can move either to LEADING or 
+                             * FOLLOWING. However if we are an OBSERVER, it is an
+                             * error to be elected as a Leader.
+                             */
+                            if (self.getPeerType() == LearnerType.OBSERVER) {
+                                if (current.id == self.getId()) {
+                                    // This should never happen!
+                                    LOG.error("OBSERVER elected as leader!");
+                                    Thread.sleep(100);
+                                }
+                                else {
+                                    self.setPeerState(ServerState.OBSERVING);
+                                    Thread.sleep(100);
+                                    return current;
+                                }
+                            } else {
+                                self.setPeerState((current.id == self.getId())
+                                        ? ServerState.LEADING: ServerState.FOLLOWING);
+                                if (self.getPeerState() == ServerState.FOLLOWING) {
+                                    Thread.sleep(100);
+                                }                            
                                 return current;
                             }
-                        } else {
-                            self.setPeerState((current.id == self.getId())
-                                    ? ServerState.LEADING: ServerState.FOLLOWING);
-                            if (self.getPeerState() == ServerState.FOLLOWING) {
-                                Thread.sleep(100);
-                            }                            
-                            return current;
                         }
                     }
                 }

Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java Sat Feb 20 14:26:00 2010
@@ -501,7 +501,7 @@
         //TODO: use a factory rather than a switch
         switch (electionAlgorithm) {
         case 0:
-            // will create a new instance for each run of the protocol
+            le = new LeaderElection(this);
             break;
         case 1:
             le = new AuthFastLeaderElection(this);
@@ -527,10 +527,9 @@
 
     protected Election makeLEStrategy(){
         LOG.debug("Initializing leader election protocol...");
-
-        if(electionAlg==null){
-            return new LeaderElection(this);
-        }
+        if (getElectionType() == 0) {
+            electionAlg = new LeaderElection(this);
+        }        
         return electionAlg;
     }
 

Added: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java?rev=912119&view=auto
==============================================================================
--- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java (added)
+++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java Sat Feb 20 14:26:00 2010
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.test;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.DatagramPacket;
+import java.net.DatagramSocket;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+
+import junit.framework.TestCase;
+
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.PortAssignment;
+import org.apache.zookeeper.server.quorum.QuorumPeer;
+import org.apache.zookeeper.server.quorum.Vote;
+import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
+import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
+import org.junit.Test;
+
+/**
+ * Tests that a particular run of LeaderElection terminates correctly.
+ */
+public class LENonTerminateTest extends TestCase {
+    protected static final Logger LOG = Logger.getLogger(FLELostMessageTest.class);
+    
+    int count;
+    HashMap<Long,QuorumServer> peers;
+    File tmpdir[];
+    int port[];   
+   
+    @Override
+    public void setUp() throws Exception {
+        count = 3;
+
+        peers = new HashMap<Long,QuorumServer>(count);
+        tmpdir = new File[count];
+        port = new int[count];
+        
+        LOG.info("SetUp " + getName());
+    }
+
+    @Override
+    public void tearDown() throws Exception {
+        LOG.info("FINISHED " + getName());
+    }
+
+
+    class LEThread extends Thread {
+        int i;
+        QuorumPeer peer;
+
+        LEThread(QuorumPeer peer, int i) {
+            this.i = i;
+            this.peer = peer;
+            LOG.info("Constructor: " + getName());
+
+        }
+
+        public void run(){            
+            try{
+                Vote v = null;
+                peer.setPeerState(ServerState.LOOKING);
+                LOG.info("Going to call leader election: " + i);
+                v = peer.getElectionAlg().lookForLeader();
+
+                if (v == null){
+                    fail("Thread " + i + " got a null vote");
+                }
+
+                /*
+                 * A real zookeeper would take care of setting the current vote. Here
+                 * we do it manually.
+                 */
+                peer.setCurrentVote(v);
+
+                LOG.info("Finished election: " + i + ", " + v.id);                    
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+            LOG.info("Joining");
+        }
+    }
+    
+    /**
+     * This tests ZK-569.
+     * With three peers A, B and C, the following could happen:
+     * 1. Round 1, A,B and C all vote for themselves
+     * 2. Round 2, C dies, A and B vote for C
+     * 3. Because C has died, votes for it are ignored, but A and B never
+     * reset their votes. Hence LE never terminates. ZK-569 fixes this by
+     * resetting votes to themselves if the set of votes for live peers is null.
+     */
+    @Test
+    public void testNonTermination() throws Exception {                
+        LOG.info("TestNonTermination: " + getName()+ ", " + count);
+        for(int i = 0; i < count; i++) {
+            int clientport = PortAssignment.unique();
+            peers.put(Long.valueOf(i),
+                    new QuorumServer(i,
+                            new InetSocketAddress(clientport),
+                            new InetSocketAddress(PortAssignment.unique())));
+            tmpdir[i] = ClientBase.createTmpDir();
+            port[i] = clientport;
+        }
+        
+        /*
+         * peer1 and peer2 are A and B in the above example. 
+         */
+        QuorumPeer peer1 = new QuorumPeer(peers, tmpdir[0], tmpdir[0], port[0], 0, 0, 2, 2, 2);
+        peer1.startLeaderElection();
+        LEThread thread1 = new LEThread(peer1, 0);
+        thread1.start();
+        
+        QuorumPeer peer2 = new QuorumPeer(peers, tmpdir[1], tmpdir[1], port[1], 0, 1, 2, 2, 2);
+        peer2.startLeaderElection();        
+        LEThread thread2 = new LEThread(peer2, 1);
+        thread2.start();
+                            
+        /*
+         * Start mock server.
+         */
+        Thread thread3 = new Thread() { 
+            public void run() {
+                try {
+                    mockServer();
+                } catch (Exception e) {
+                    LOG.error(e);
+                    fail("Exception when running mocked server " + e);
+                }
+            }
+        };        
+        
+        thread3.start();
+        /*
+         * Occasionally seen false negatives with a 5s timeout.
+         */
+        thread1.join(15000);
+        thread2.join(15000);
+        thread3.join(15000);
+        if (thread1.isAlive() || thread2.isAlive() || thread3.isAlive()) {
+            fail("Threads didn't join");
+        }
+    }
+     
+    /**
+     * MockServer plays the role of peer C. Respond to two requests for votes
+     * with vote for self and then fail. 
+     */
+    void mockServer() throws InterruptedException, IOException {          
+        byte b[] = new byte[36];
+        ByteBuffer responseBuffer = ByteBuffer.wrap(b);
+        DatagramPacket packet = new DatagramPacket(b, b.length);
+        QuorumServer server = peers.get(Long.valueOf(2));
+        DatagramSocket udpSocket = new DatagramSocket(server.addr.getPort());
+        Vote current = new Vote(2, 1);
+        for (int i=0;i<2;++i) {
+            udpSocket.receive(packet);
+            responseBuffer.clear();
+            responseBuffer.getInt(); // Skip the xid
+            responseBuffer.putLong(2);
+            
+            responseBuffer.putLong(current.id);
+            responseBuffer.putLong(current.zxid);
+            packet.setData(b);
+            udpSocket.send(packet);
+        }
+    }    
+}

Modified: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java (original)
+++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java Sat Feb 20 14:26:00 2010
@@ -26,6 +26,7 @@
 
 import junit.framework.TestCase;
 
+import org.apache.log4j.Logger;
 import org.apache.zookeeper.PortAssignment;
 import org.apache.zookeeper.server.quorum.LeaderElection;
 import org.apache.zookeeper.server.quorum.QuorumPeer;
@@ -33,6 +34,7 @@
 import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
 
 public class LETest extends TestCase {
+    private static final Logger LOG = Logger.getLogger(LETest.class);
     volatile Vote votes[];
     volatile boolean leaderDies;
     volatile long leader = -1;
@@ -57,7 +59,7 @@
                             if (leaderDies) {
                                 leaderDies = false;
                                 peer.stopLeaderElection();
-                                System.out.println("Leader " + i + " dying");
+                                LOG.info("Leader " + i + " dying");
                                 leader = -2;
                             } else {
                                 leader = i;
@@ -77,7 +79,7 @@
                     Thread.sleep(rand.nextInt(1000));
                     peer.setCurrentVote(new Vote(peer.getId(), 0));
                 }
-                System.out.println("Thread " + i + " votes " + v);
+                LOG.info("Thread " + i + " votes " + v);
             } catch (InterruptedException e) {
                 e.printStackTrace();
             }
@@ -129,5 +131,5 @@
                 }
             }
         }
-    }
+    }    
 }