You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ph...@apache.org on 2017/12/16 00:36:21 UTC
zookeeper git commit: ZOOKEEPER-2953: Flaky Test:
testNoLogBeforeLeaderEstablishment
Repository: zookeeper
Updated Branches:
refs/heads/master aa8b60670 -> f2cbcc7e0
ZOOKEEPER-2953: Flaky Test: testNoLogBeforeLeaderEstablishment
Author: Abraham Fine <af...@apache.org>
Reviewers: phunt@apache.org
Closes #432 from afine/ZOOKEEPER-2953 and squashes the following commits:
dc21603d [Abraham Fine] improve assertion logging
b5bc449c [Abraham Fine] use shared Servers and numServers
9fac0e20 [Abraham Fine] fix whitespace
68a03820 [Abraham Fine] ZOOKEEPER-2953: Flaky Test: testNoLogBeforeLeaderEstablishment
Change-Id: I8ee7b2d46783baa1e4cf2876cced740f4cd25ec2
Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo
Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/f2cbcc7e
Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/f2cbcc7e
Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/f2cbcc7e
Branch: refs/heads/master
Commit: f2cbcc7e0d7adff08bd73a27f2193b1198e4c7f7
Parents: aa8b606
Author: Abraham Fine <af...@apache.org>
Authored: Fri Dec 15 16:25:30 2017 -0800
Committer: Patrick Hunt <ph...@apache.org>
Committed: Fri Dec 15 16:25:30 2017 -0800
----------------------------------------------------------------------
.../server/quorum/QuorumPeerMainTest.java | 165 +++++++++++++++----
.../server/quorum/QuorumPeerTestBase.java | 19 ++-
.../org/apache/zookeeper/test/QuorumTest.java | 88 ----------
3 files changed, 151 insertions(+), 121 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
index a5ca72f..2a02428 100644
--- a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
+++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
@@ -27,6 +27,7 @@ import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.SocketChannel;
+import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
@@ -335,6 +336,100 @@ public class QuorumPeerMainTest extends QuorumPeerTestBase {
output[0], 2);
}
+ /**
+ * This test validates that if a quorum member determines that it is leader without the support of the rest of the
+ * quorum (the other members do not believe it to be the leader) it will stop attempting to lead and become a follower.
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testElectionFraud() throws IOException, InterruptedException {
+ // capture QuorumPeer logging
+ Layout layout = Logger.getRootLogger().getAppender("CONSOLE").getLayout();
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ WriterAppender appender = new WriterAppender(layout, os);
+ appender.setThreshold(Level.INFO);
+ Logger qlogger = Logger.getLogger(QuorumPeer.class);
+ qlogger.addAppender(appender);
+
+ numServers = 3;
+
+ // used for assertions later
+ boolean foundLeading = false;
+ boolean foundLooking = false;
+ boolean foundFollowing = false;
+
+ try {
+ // spin up a quorum, we use a small ticktime to make the test run faster
+ servers = LaunchServers(numServers, 500);
+
+ // find the leader
+ int trueLeader = -1;
+ for (int i = 0; i < numServers; i++) {
+ if (servers.mt[i].main.quorumPeer.leader != null) {
+ trueLeader = i;
+ }
+ }
+ Assert.assertTrue("There should be a leader", trueLeader >= 0);
+
+ // find a follower
+ int falseLeader = (trueLeader + 1) % numServers;
+ Assert.assertTrue("All servers should join the quorum", servers.mt[falseLeader].main.quorumPeer.follower != null);
+
+ // to keep the quorum peer running and force it to go into the looking state, we kill leader election
+ // and close the connection to the leader
+ servers.mt[falseLeader].main.quorumPeer.electionAlg.shutdown();
+ servers.mt[falseLeader].main.quorumPeer.follower.getSocket().close();
+
+ // wait for the falseLeader to disconnect
+ waitForOne(servers.zk[falseLeader], States.CONNECTING);
+
+ // convince falseLeader that it is the leader
+ servers.mt[falseLeader].main.quorumPeer.setPeerState(QuorumPeer.ServerState.LEADING);
+
+ // provide time for the falseleader to realize no followers have connected
+ // (this is twice the timeout used in Leader#getEpochToPropose)
+ Thread.sleep(2 * servers.mt[falseLeader].main.quorumPeer.initLimit * servers.mt[falseLeader].main.quorumPeer.tickTime);
+
+ // Restart leader election
+ servers.mt[falseLeader].main.quorumPeer.startLeaderElection();
+
+ // The previous client connection to falseLeader likely closed, create a new one
+ servers.zk[falseLeader] = new ZooKeeper("127.0.0.1:" + servers.mt[falseLeader].getClientPort(), ClientBase.CONNECTION_TIMEOUT, this);
+
+ // Wait for falseLeader to rejoin the quorum
+ waitForOne(servers.zk[falseLeader], States.CONNECTED);
+
+ // and ensure trueLeader is still the leader
+ Assert.assertTrue(servers.mt[trueLeader].main.quorumPeer.leader != null);
+
+ // Look through the logs for output that indicates the falseLeader is LEADING, then LOOKING, then FOLLOWING
+ LineNumberReader r = new LineNumberReader(new StringReader(os.toString()));
+ Pattern leading = Pattern.compile(".*myid=" + falseLeader + ".*LEADING.*");
+ Pattern looking = Pattern.compile(".*myid=" + falseLeader + ".*LOOKING.*");
+ Pattern following = Pattern.compile(".*myid=" + falseLeader + ".*FOLLOWING.*");
+
+ String line;
+ while ((line = r.readLine()) != null) {
+ if (!foundLeading) {
+ foundLeading = leading.matcher(line).matches();
+ } else if(!foundLooking) {
+ foundLooking = looking.matcher(line).matches();
+ } else if (following.matcher(line).matches()){
+ foundFollowing = true;
+ break;
+ }
+ }
+ } finally {
+ qlogger.removeAppender(appender);
+ }
+
+ Assert.assertTrue("falseLeader never attempts to become leader", foundLeading);
+ Assert.assertTrue("falseLeader never gives up on leadership", foundLooking);
+ Assert.assertTrue("falseLeader never rejoins the quorum", foundFollowing);
+ }
+
private void waitForOne(ZooKeeper zk, States state) throws InterruptedException {
int iterations = ClientBase.CONNECTION_TIMEOUT / 500;
while (zk.getState() != state) {
@@ -371,40 +466,48 @@ public class QuorumPeerMainTest extends QuorumPeerTestBase {
ZooKeeper zk[];
}
- /**
- * This is a helper function for launching a set of servers
- *
- * @param numServers
- * @return
- * @throws IOException
- * @throws InterruptedException
- */
- private Servers LaunchServers(int numServers) throws IOException, InterruptedException {
- int SERVER_COUNT = numServers;
- Servers svrs = new Servers();
- final int clientPorts[] = new int[SERVER_COUNT];
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < SERVER_COUNT; i++) {
- clientPorts[i] = PortAssignment.unique();
- sb.append("server."+i+"=127.0.0.1:"+PortAssignment.unique()+":"+PortAssignment.unique()+";"+clientPorts[i]+"\n");
- }
- String quorumCfgSection = sb.toString();
- MainThread mt[] = new MainThread[SERVER_COUNT];
- ZooKeeper zk[] = new ZooKeeper[SERVER_COUNT];
- for (int i = 0; i < SERVER_COUNT; i++) {
- mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection);
- mt[i].start();
- zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this);
- }
-
- waitForAll(zk, States.CONNECTED);
-
- svrs.mt = mt;
- svrs.zk = zk;
- return svrs;
+ private Servers LaunchServers(int numServers) throws IOException, InterruptedException {
+ return LaunchServers(numServers, null);
}
+ /** * This is a helper function for launching a set of servers
+ *
+ * @param numServers* @param tickTime A ticktime to pass to MainThread
+ * @return
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ private Servers LaunchServers(int numServers, Integer tickTime) throws IOException, InterruptedException {
+ int SERVER_COUNT = numServers;
+ Servers svrs = new Servers();
+ final int clientPorts[] = new int[SERVER_COUNT];
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < SERVER_COUNT; i++) {
+ clientPorts[i] = PortAssignment.unique();
+ sb.append("server."+i+"=127.0.0.1:"+PortAssignment.unique()+":"+PortAssignment.unique()+";"+clientPorts[i]+"\n");
+ }
+ String quorumCfgSection = sb.toString();
+
+ MainThread mt[] = new MainThread[SERVER_COUNT];
+ ZooKeeper zk[] = new ZooKeeper[SERVER_COUNT];
+ for(int i = 0; i < SERVER_COUNT; i++) {
+ if (tickTime != null) {
+ mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection, new HashMap<String, String>(), tickTime);
+ } else {
+ mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection);
+ }
+ mt[i].start();
+ zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this);
+ }
+
+ waitForAll(zk, States.CONNECTED);
+
+ svrs.mt = mt;
+ svrs.zk = zk;
+ return svrs;
+ }
+
/**
* Verify handling of bad quorum address
*/
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
index 21aa819..a357a6e 100644
--- a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
+++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
@@ -82,8 +82,18 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
private String quorumCfgSection;
private Map<String, String> otherConfigs;
+ /**
+ * Create a MainThread
+ *
+ * @param myid
+ * @param clientPort
+ * @param quorumCfgSection
+ * @param otherConfigs
+ * @param tickTime initLimit will be 10 and syncLimit will be 5
+ * @throws IOException
+ */
public MainThread(int myid, int clientPort, String quorumCfgSection,
- Map<String, String> otherConfigs) throws IOException {
+ Map<String, String> otherConfigs, int tickTime) throws IOException {
baseDir = ClientBase.createTmpDir();
this.myid = myid;
this.clientPort = clientPort;
@@ -94,7 +104,7 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
confFile = new File(baseDir, "zoo.cfg");
FileWriter fwriter = new FileWriter(confFile);
- fwriter.write("tickTime=4000\n");
+ fwriter.write("tickTime=" + tickTime + "\n");
fwriter.write("initLimit=10\n");
fwriter.write("syncLimit=5\n");
@@ -296,6 +306,11 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
new HashMap<String, String>());
}
+ public MainThread(int myid, int clientPort, String quorumCfgSection,
+ Map<String, String> otherConfigs) throws IOException {
+ this(myid, clientPort, quorumCfgSection, otherConfigs, 4000);
+ }
+
Thread currentThread;
synchronized public void start() {
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/test/QuorumTest.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/test/QuorumTest.java b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
index 469a9d1..cd1d153 100644
--- a/src/java/test/org/apache/zookeeper/test/QuorumTest.java
+++ b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
@@ -329,94 +329,6 @@ public class QuorumTest extends ZKTestCase {
zk.close();
}
- /**
- * Tests if closeSession can be logged before a leader gets established, which
- * could lead to a locked-out follower (see ZOOKEEPER-790).
- *
- * The test works as follows. It has a client connecting to a follower f and
- * sending batches of 1,000 updates. The goal is that f has a zxid higher than
- * all other servers in the initial leader election. This way we can crash and
- * recover the follower so that the follower believes it is the leader once it
- * recovers (LE optimization: once a server receives a message from all other
- * servers, it picks a leader.
- *
- * It also makes the session timeout very short so that we force the false
- * leader to close the session and write it to the log in the buggy code (before
- * ZOOKEEPER-790). Once f drops leadership and finds the current leader, its epoch
- * is higher, and it rejects the leader. Now, if we prevent the leader from closing
- * the session by only starting up (see Leader.lead()) once it obtains a quorum of
- * supporters, then f will find the current leader and support it because it won't
- * have a highe epoch.
- *
- */
- @Test
- public void testNoLogBeforeLeaderEstablishment () throws Exception {
- final Semaphore sem = new Semaphore(0);
-
- qu = new QuorumUtil(2, 10);
- qu.startQuorum();
-
- int index = 1;
- while(qu.getPeer(index).peer.leader == null)
- index++;
-
- Leader leader = qu.getPeer(index).peer.leader;
-
- Assert.assertNotNull(leader);
-
- /*
- * Reusing the index variable to select a follower to connect to
- */
- index = (index == 1) ? 2 : 1;
-
- ZooKeeper zk = new DisconnectableZooKeeper(
- "127.0.0.1:" + qu.getPeer(index).peer.getClientPort(),
- ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) { }
- });
-
- zk.create("/blah", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
-
- for(int i = 0; i < 50000; i++) {
- zk.setData("/blah", new byte[0], -1, new AsyncCallback.StatCallback() {
- public void processResult(int rc, String path, Object ctx,
- Stat stat) {
- counter++;
- if (rc != 0) {
- errors++;
- }
- if(counter == 20000){
- sem.release();
- }
- }
- }, null);
-
- if(i == 5000){
- qu.shutdown(index);
- LOG.info("Shutting down s1");
- }
- if(i == 12000){
- qu.start(index);
- LOG.info("Setting up server: " + index);
- }
- if((i % 1000) == 0){
- Thread.sleep(500);
- }
- }
-
- // Wait until all updates return
- sem.tryAcquire(15, TimeUnit.SECONDS);
-
- // Verify that server is following and has the same epoch as the leader
- Assert.assertTrue("Not following", qu.getPeer(index).peer.follower != null);
- long epochF = (qu.getPeer(index).peer.getActiveServer().getZxid() >> 32L);
- long epochL = (leader.getEpoch() >> 32L);
- Assert.assertTrue("Zxid: " + qu.getPeer(index).peer.getActiveServer().getZxid() +
- "Current epoch: " + epochF, epochF == epochL);
-
- zk.close();
- }
-
// skip superhammer and clientcleanup as they are too expensive for quorum
/**