You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by fp...@apache.org on 2013/10/21 23:35:13 UTC
svn commit: r1534388 - in /zookeeper/branches/branch-3.4: ./
src/java/main/org/apache/zookeeper/server/quorum/
src/java/test/org/apache/zookeeper/test/
Author: fpj
Date: Mon Oct 21 21:35:12 2013
New Revision: 1534388
URL: http://svn.apache.org/r1534388
Log:
ZOOKEEPER-1732. ZooKeeper server unable to join established ensemble (German Blanco via fpj)
Modified:
zookeeper/branches/branch-3.4/CHANGES.txt
zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/test/FLETest.java
Modified: zookeeper/branches/branch-3.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/CHANGES.txt?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/CHANGES.txt (original)
+++ zookeeper/branches/branch-3.4/CHANGES.txt Mon Oct 21 21:35:12 2013
@@ -137,6 +137,8 @@ BUGFIXES:
ZOOKEEPER-1558. Leader should not snapshot uncommitted state (fpj)
+ ZOOKEEPER-1732. ZooKeeper server unable to join established ensemble (German Blanco via fpj)
+
IMPROVEMENTS:
ZOOKEEPER-1564. Allow JUnit test build with IBM Java
Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java (original)
+++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/FastLeaderElection.java Mon Oct 21 21:35:12 2013
@@ -66,6 +66,14 @@ public class FastLeaderElection implemen
*/
final static int maxNotificationInterval = 60000;
+
+ /**
+ * This value is passed to the methods that check the quorum
+ * majority of an established ensemble for those values that
+ * should not be taken into account in the comparison
+ * (electionEpoch and zxid).
+ */
+ final static int IGNOREVALUE = -1;
/**
* Connection manager. Fast leader election uses TCP for
@@ -324,7 +332,7 @@ public class FastLeaderElection implemen
ToSend.mType.notification,
current.getId(),
current.getZxid(),
- logicalclock,
+ current.getElectionEpoch(),
self.getPeerState(),
response.sid,
current.getPeerEpoch());
@@ -867,15 +875,25 @@ public class FastLeaderElection implemen
}
}
- /**
+ /*
* Before joining an established ensemble, verify that
* a majority are following the same leader.
+ * Only peer epoch is used to check that the votes come
+ * from the same ensemble. This is because there is at
+ * least one corner case in which the ensemble can be
+ * created with inconsistent zxid and election epoch
+ * info. However, given that only one ensemble can be
+ * running at a single point in time and that each
+ * epoch is used only once, using only the epoch to
+ * compare the votes is sufficient.
+ *
+ * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
*/
- outofelection.put(n.sid, new Vote(n.leader, n.zxid,
- n.electionEpoch, n.peerEpoch, n.state));
+ outofelection.put(n.sid, new Vote(n.leader,
+ IGNOREVALUE, IGNOREVALUE, n.peerEpoch, n.state));
if (termPredicate(outofelection, new Vote(n.leader,
- n.zxid, n.electionEpoch, n.peerEpoch, n.state))
- && checkLeader(outofelection, n.leader, n.electionEpoch)) {
+ IGNOREVALUE, IGNOREVALUE, n.peerEpoch, n.state))
+ && checkLeader(outofelection, n.leader, IGNOREVALUE)) {
synchronized(this){
logicalclock = n.electionEpoch;
self.setPeerState((n.leader == self.getId()) ?
Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original)
+++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Mon Oct 21 21:35:12 2013
@@ -945,6 +945,15 @@ public class Leader {
+ " ]; starting up and setting last processed zxid: 0x{}",
Long.toHexString(zk.getZxid()));
zk.startup();
+ /*
+ * Update the election vote here to ensure that all members of the
+ * ensemble report the same vote to new servers that start up and
+ * send leader election notifications to the ensemble.
+ *
+ * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
+ */
+ self.updateElectionVote(getEpoch());
+
zk.getZKDatabase().setlastProcessedZxid(zk.getZxid());
}
Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Learner.java?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Learner.java (original)
+++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/Learner.java Mon Oct 21 21:35:12 2013
@@ -435,6 +435,15 @@ public class Learner {
writePacket(ack, true);
sock.setSoTimeout(self.tickTime * self.syncLimit);
zk.startup();
+ /*
+ * Update the election vote here to ensure that all members of the
+ * ensemble report the same vote to new servers that start up and
+ * send leader election notifications to the ensemble.
+ *
+ * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
+ */
+ self.updateElectionVote(newEpoch);
+
// We need to log the stuff that came in between the snapshot and the uptodate
if (zk instanceof FollowerZooKeeperServer) {
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer)zk;
Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java (original)
+++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/QuorumPeer.java Mon Oct 21 21:35:12 2013
@@ -1194,4 +1194,21 @@ public class QuorumPeer extends Thread i
acceptedEpoch = e;
writeLongToFile(ACCEPTED_EPOCH_FILENAME, e);
}
+
+ /**
+ * Updates leader election info to avoid inconsistencies when
+ * a new server tries to join the ensemble.
+ * See ZOOKEEPER-1732 for more info.
+ */
+ protected void updateElectionVote(long newEpoch) {
+ Vote currentVote = getCurrentVote();
+ if (currentVote != null) {
+ setCurrentVote(new Vote(currentVote.getId(),
+ currentVote.getZxid(),
+ currentVote.getElectionEpoch(),
+ newEpoch,
+ currentVote.getState()));
+ }
+ }
+
}
Modified: zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/test/FLETest.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/test/FLETest.java?rev=1534388&r1=1534387&r2=1534388&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/test/FLETest.java (original)
+++ zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/test/FLETest.java Mon Oct 21 21:35:12 2013
@@ -403,4 +403,67 @@ public class FLETest extends ZKTestCase
}
}
}
+
+ /*
+ * For ZOOKEEPER-1732 verify that it is possible to join an ensemble with
+ * inconsistent election round information.
+ */
+ @Test
+ public void testJoinInconsistentEnsemble() throws Exception {
+ int sid;
+ QuorumPeer peer;
+ int waitTime = 10 * 1000;
+ ArrayList<QuorumPeer> peerList = new ArrayList<QuorumPeer>();
+ for(sid = 0; sid < 3; sid++) {
+ peers.put(Long.valueOf(sid),
+ new QuorumServer(sid,
+ new InetSocketAddress(PortAssignment.unique()),
+ new InetSocketAddress(PortAssignment.unique())));
+ tmpdir[sid] = ClientBase.createTmpDir();
+ port[sid] = PortAssignment.unique();
+ }
+ // start 2 peers and verify if they form the cluster
+ for (sid = 0; sid < 2; sid++) {
+ peer = new QuorumPeer(peers, tmpdir[sid], tmpdir[sid],
+ port[sid], 3, sid, 2000, 2, 2);
+ LOG.info("Starting peer " + peer.getId());
+ peer.start();
+ peerList.add(sid, peer);
+ }
+ peer = peerList.get(0);
+ VerifyState v1 = new VerifyState(peerList.get(0));
+ v1.start();
+ v1.join(waitTime);
+ Assert.assertFalse("Unable to form cluster in " +
+ waitTime + " ms",
+ !v1.isSuccess());
+ // Change the election round for one of the members of the ensemble
+ long leaderSid = peer.getCurrentVote().getId();
+ long zxid = peer.getCurrentVote().getZxid();
+ long electionEpoch = peer.getCurrentVote().getElectionEpoch();
+ ServerState state = peer.getCurrentVote().getState();
+ long peerEpoch = peer.getCurrentVote().getPeerEpoch();
+ Vote newVote = new Vote(leaderSid, zxid+100, electionEpoch+100, peerEpoch, state);
+ peer.setCurrentVote(newVote);
+ // Start 3rd peer and check if it joins the quorum
+ peer = new QuorumPeer(peers, tmpdir[2], tmpdir[2],
+ port[2], 3, 2, 2000, 2, 2);
+ LOG.info("Starting peer " + peer.getId());
+ peer.start();
+ peerList.add(sid, peer);
+ v1 = new VerifyState(peer);
+ v1.start();
+ v1.join(waitTime);
+ if (v1.isAlive()) {
+ Assert.fail("Peer " + peer.getId() + " failed to join the cluster " +
+ "within " + waitTime + " ms");
+ }
+ // cleanup
+ for (int id = 0; id < 3; id++) {
+ peer = peerList.get(id);
+ if (peer != null) {
+ peer.shutdown();
+ }
+ }
+ }
}