You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by fp...@apache.org on 2010/02/20 15:26:00 UTC
svn commit: r912119 - in /hadoop/zookeeper/trunk: ./
src/java/main/org/apache/zookeeper/server/quorum/
src/java/test/org/apache/zookeeper/test/
Author: fpj
Date: Sat Feb 20 14:26:00 2010
New Revision: 912119
URL: http://svn.apache.org/viewvc?rev=912119&view=rev
Log:
ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader
election (henry via flavio)
Added:
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java
Modified:
hadoop/zookeeper/trunk/CHANGES.txt
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java
hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java
Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Sat Feb 20 14:26:00 2010
@@ -228,6 +228,9 @@
ZOOKEEPER-668. Close method in LedgerInputStream doesn't do anything (flavio
via mahadev)
+ ZOOKEEPER-569. Failure of elected leader can lead to never-ending leader
+ election (henry via flavio)
+
IMPROVEMENTS:
ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to
"socket reuse" and failure to close client (phunt via mahadev)
Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/LeaderElection.java Sat Feb 20 14:26:00 2010
@@ -198,40 +198,50 @@
// down
}
}
+
ElectionResult result = countVotes(votes, heardFrom);
- if (result.winner.id >= 0) {
- self.setCurrentVote(result.vote);
- // To do: this doesn't use a quorum verifier
- if (result.winningCount > (self.getVotingView().size() / 2)) {
- self.setCurrentVote(result.winner);
- s.close();
- Vote current = self.getCurrentVote();
- LOG.info("Found leader: my type is: " + self.getPeerType());
- /**
- * We want to make sure we implement the state machine
- * correctly. If we are a PARTICIPANT, once a leader
- * is elected we can move either to LEADING or
- * FOLLOWING. However if we are an OBSERVER, it is an
- * error to be elected as a Leader.
- */
- if (self.getPeerType() == LearnerType.OBSERVER) {
- if (current.id == self.getId()) {
- // This should never happen!
- LOG.error("OBSERVER elected as leader!");
- Thread.sleep(100);
- }
- else {
- self.setPeerState(ServerState.OBSERVING);
- Thread.sleep(100);
+ // ZOOKEEPER-569:
+ // If no votes are received for live peers, reset to voting
+ // for ourselves as otherwise we may hang on to a vote
+ // for a dead peer
+ if (votes.size() == 0) {
+ self.setCurrentVote(new Vote(self.getId(),
+ self.getLastLoggedZxid()));
+ } else {
+ if (result.winner.id >= 0) {
+ self.setCurrentVote(result.vote);
+ // To do: this doesn't use a quorum verifier
+ if (result.winningCount > (self.getVotingView().size() / 2)) {
+ self.setCurrentVote(result.winner);
+ s.close();
+ Vote current = self.getCurrentVote();
+ LOG.info("Found leader: my type is: " + self.getPeerType());
+ /*
+ * We want to make sure we implement the state machine
+ * correctly. If we are a PARTICIPANT, once a leader
+ * is elected we can move either to LEADING or
+ * FOLLOWING. However if we are an OBSERVER, it is an
+ * error to be elected as a Leader.
+ */
+ if (self.getPeerType() == LearnerType.OBSERVER) {
+ if (current.id == self.getId()) {
+ // This should never happen!
+ LOG.error("OBSERVER elected as leader!");
+ Thread.sleep(100);
+ }
+ else {
+ self.setPeerState(ServerState.OBSERVING);
+ Thread.sleep(100);
+ return current;
+ }
+ } else {
+ self.setPeerState((current.id == self.getId())
+ ? ServerState.LEADING: ServerState.FOLLOWING);
+ if (self.getPeerState() == ServerState.FOLLOWING) {
+ Thread.sleep(100);
+ }
return current;
}
- } else {
- self.setPeerState((current.id == self.getId())
- ? ServerState.LEADING: ServerState.FOLLOWING);
- if (self.getPeerState() == ServerState.FOLLOWING) {
- Thread.sleep(100);
- }
- return current;
}
}
}
Modified: hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java (original)
+++ hadoop/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java Sat Feb 20 14:26:00 2010
@@ -501,7 +501,7 @@
//TODO: use a factory rather than a switch
switch (electionAlgorithm) {
case 0:
- // will create a new instance for each run of the protocol
+ le = new LeaderElection(this);
break;
case 1:
le = new AuthFastLeaderElection(this);
@@ -527,10 +527,9 @@
protected Election makeLEStrategy(){
LOG.debug("Initializing leader election protocol...");
-
- if(electionAlg==null){
- return new LeaderElection(this);
- }
+ if (getElectionType() == 0) {
+ electionAlg = new LeaderElection(this);
+ }
return electionAlg;
}
Added: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java?rev=912119&view=auto
==============================================================================
--- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java (added)
+++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LENonTerminateTest.java Sat Feb 20 14:26:00 2010
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.test;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.DatagramPacket;
+import java.net.DatagramSocket;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+
+import junit.framework.TestCase;
+
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.PortAssignment;
+import org.apache.zookeeper.server.quorum.QuorumPeer;
+import org.apache.zookeeper.server.quorum.Vote;
+import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
+import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
+import org.junit.Test;
+
+/**
+ * Tests that a particular run of LeaderElection terminates correctly.
+ */
+public class LENonTerminateTest extends TestCase {
+ protected static final Logger LOG = Logger.getLogger(FLELostMessageTest.class);
+
+ int count;
+ HashMap<Long,QuorumServer> peers;
+ File tmpdir[];
+ int port[];
+
+ @Override
+ public void setUp() throws Exception {
+ count = 3;
+
+ peers = new HashMap<Long,QuorumServer>(count);
+ tmpdir = new File[count];
+ port = new int[count];
+
+ LOG.info("SetUp " + getName());
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ LOG.info("FINISHED " + getName());
+ }
+
+
+ class LEThread extends Thread {
+ int i;
+ QuorumPeer peer;
+
+ LEThread(QuorumPeer peer, int i) {
+ this.i = i;
+ this.peer = peer;
+ LOG.info("Constructor: " + getName());
+
+ }
+
+ public void run(){
+ try{
+ Vote v = null;
+ peer.setPeerState(ServerState.LOOKING);
+ LOG.info("Going to call leader election: " + i);
+ v = peer.getElectionAlg().lookForLeader();
+
+ if (v == null){
+ fail("Thread " + i + " got a null vote");
+ }
+
+ /*
+ * A real zookeeper would take care of setting the current vote. Here
+ * we do it manually.
+ */
+ peer.setCurrentVote(v);
+
+ LOG.info("Finished election: " + i + ", " + v.id);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ LOG.info("Joining");
+ }
+ }
+
+ /**
+ * This tests ZK-569.
+ * With three peers A, B and C, the following could happen:
+ * 1. Round 1, A,B and C all vote for themselves
+ * 2. Round 2, C dies, A and B vote for C
+ * 3. Because C has died, votes for it are ignored, but A and B never
+ * reset their votes. Hence LE never terminates. ZK-569 fixes this by
+ * resetting votes to themselves if the set of votes for live peers is null.
+ */
+ @Test
+ public void testNonTermination() throws Exception {
+ LOG.info("TestNonTermination: " + getName()+ ", " + count);
+ for(int i = 0; i < count; i++) {
+ int clientport = PortAssignment.unique();
+ peers.put(Long.valueOf(i),
+ new QuorumServer(i,
+ new InetSocketAddress(clientport),
+ new InetSocketAddress(PortAssignment.unique())));
+ tmpdir[i] = ClientBase.createTmpDir();
+ port[i] = clientport;
+ }
+
+ /*
+ * peer1 and peer2 are A and B in the above example.
+ */
+ QuorumPeer peer1 = new QuorumPeer(peers, tmpdir[0], tmpdir[0], port[0], 0, 0, 2, 2, 2);
+ peer1.startLeaderElection();
+ LEThread thread1 = new LEThread(peer1, 0);
+ thread1.start();
+
+ QuorumPeer peer2 = new QuorumPeer(peers, tmpdir[1], tmpdir[1], port[1], 0, 1, 2, 2, 2);
+ peer2.startLeaderElection();
+ LEThread thread2 = new LEThread(peer2, 1);
+ thread2.start();
+
+ /*
+ * Start mock server.
+ */
+ Thread thread3 = new Thread() {
+ public void run() {
+ try {
+ mockServer();
+ } catch (Exception e) {
+ LOG.error(e);
+ fail("Exception when running mocked server " + e);
+ }
+ }
+ };
+
+ thread3.start();
+ /*
+ * Occasionally seen false negatives with a 5s timeout.
+ */
+ thread1.join(15000);
+ thread2.join(15000);
+ thread3.join(15000);
+ if (thread1.isAlive() || thread2.isAlive() || thread3.isAlive()) {
+ fail("Threads didn't join");
+ }
+ }
+
+ /**
+ * MockServer plays the role of peer C. Respond to two requests for votes
+ * with vote for self and then fail.
+ */
+ void mockServer() throws InterruptedException, IOException {
+ byte b[] = new byte[36];
+ ByteBuffer responseBuffer = ByteBuffer.wrap(b);
+ DatagramPacket packet = new DatagramPacket(b, b.length);
+ QuorumServer server = peers.get(Long.valueOf(2));
+ DatagramSocket udpSocket = new DatagramSocket(server.addr.getPort());
+ Vote current = new Vote(2, 1);
+ for (int i=0;i<2;++i) {
+ udpSocket.receive(packet);
+ responseBuffer.clear();
+ responseBuffer.getInt(); // Skip the xid
+ responseBuffer.putLong(2);
+
+ responseBuffer.putLong(current.id);
+ responseBuffer.putLong(current.zxid);
+ packet.setData(b);
+ udpSocket.send(packet);
+ }
+ }
+}
Modified: hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java?rev=912119&r1=912118&r2=912119&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java (original)
+++ hadoop/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/LETest.java Sat Feb 20 14:26:00 2010
@@ -26,6 +26,7 @@
import junit.framework.TestCase;
+import org.apache.log4j.Logger;
import org.apache.zookeeper.PortAssignment;
import org.apache.zookeeper.server.quorum.LeaderElection;
import org.apache.zookeeper.server.quorum.QuorumPeer;
@@ -33,6 +34,7 @@
import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
public class LETest extends TestCase {
+ private static final Logger LOG = Logger.getLogger(LETest.class);
volatile Vote votes[];
volatile boolean leaderDies;
volatile long leader = -1;
@@ -57,7 +59,7 @@
if (leaderDies) {
leaderDies = false;
peer.stopLeaderElection();
- System.out.println("Leader " + i + " dying");
+ LOG.info("Leader " + i + " dying");
leader = -2;
} else {
leader = i;
@@ -77,7 +79,7 @@
Thread.sleep(rand.nextInt(1000));
peer.setCurrentVote(new Vote(peer.getId(), 0));
}
- System.out.println("Thread " + i + " votes " + v);
+ LOG.info("Thread " + i + " votes " + v);
} catch (InterruptedException e) {
e.printStackTrace();
}
@@ -129,5 +131,5 @@
}
}
}
- }
+ }
}