You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ph...@apache.org on 2017/12/16 00:36:21 UTC

zookeeper git commit: ZOOKEEPER-2953: Flaky Test: testNoLogBeforeLeaderEstablishment

Repository: zookeeper
Updated Branches:
  refs/heads/master aa8b60670 -> f2cbcc7e0


ZOOKEEPER-2953: Flaky Test: testNoLogBeforeLeaderEstablishment

Author: Abraham Fine <af...@apache.org>

Reviewers: phunt@apache.org

Closes #432 from afine/ZOOKEEPER-2953 and squashes the following commits:

dc21603d [Abraham Fine] improve assertion logging
b5bc449c [Abraham Fine] use shared Servers and numServers
9fac0e20 [Abraham Fine] fix whitespace
68a03820 [Abraham Fine] ZOOKEEPER-2953: Flaky Test: testNoLogBeforeLeaderEstablishment

Change-Id: I8ee7b2d46783baa1e4cf2876cced740f4cd25ec2


Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo
Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/f2cbcc7e
Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/f2cbcc7e
Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/f2cbcc7e

Branch: refs/heads/master
Commit: f2cbcc7e0d7adff08bd73a27f2193b1198e4c7f7
Parents: aa8b606
Author: Abraham Fine <af...@apache.org>
Authored: Fri Dec 15 16:25:30 2017 -0800
Committer: Patrick Hunt <ph...@apache.org>
Committed: Fri Dec 15 16:25:30 2017 -0800

----------------------------------------------------------------------
 .../server/quorum/QuorumPeerMainTest.java       | 165 +++++++++++++++----
 .../server/quorum/QuorumPeerTestBase.java       |  19 ++-
 .../org/apache/zookeeper/test/QuorumTest.java   |  88 ----------
 3 files changed, 151 insertions(+), 121 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
index a5ca72f..2a02428 100644
--- a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
+++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
 import java.nio.channels.SocketChannel;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -335,6 +336,100 @@ public class QuorumPeerMainTest extends QuorumPeerTestBase {
                 output[0], 2);
     }
 
+    /**
+     * This test validates that if a quorum member determines that it is leader without the support of the rest of the
+     * quorum (the other members do not believe it to be the leader) it will stop attempting to lead and become a follower.
+     *
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    @Test
+    public void testElectionFraud() throws IOException, InterruptedException {
+        // capture QuorumPeer logging
+        Layout layout = Logger.getRootLogger().getAppender("CONSOLE").getLayout();
+        ByteArrayOutputStream os = new ByteArrayOutputStream();
+        WriterAppender appender = new WriterAppender(layout, os);
+        appender.setThreshold(Level.INFO);
+        Logger qlogger = Logger.getLogger(QuorumPeer.class);
+        qlogger.addAppender(appender);
+
+        numServers = 3;
+
+        // used for assertions later
+        boolean foundLeading = false;
+        boolean foundLooking = false;
+        boolean foundFollowing = false;
+
+        try {
+          // spin up a quorum, we use a small ticktime to make the test run faster
+          servers = LaunchServers(numServers, 500);
+
+          // find the leader
+          int trueLeader = -1;
+          for (int i = 0; i < numServers; i++) {
+            if (servers.mt[i].main.quorumPeer.leader != null) {
+              trueLeader = i;
+            }
+          }
+          Assert.assertTrue("There should be a leader", trueLeader >= 0);
+
+          // find a follower
+          int falseLeader = (trueLeader + 1) % numServers;
+          Assert.assertTrue("All servers should join the quorum", servers.mt[falseLeader].main.quorumPeer.follower != null);
+
+          // to keep the quorum peer running and force it to go into the looking state, we kill leader election
+          // and close the connection to the leader
+          servers.mt[falseLeader].main.quorumPeer.electionAlg.shutdown();
+          servers.mt[falseLeader].main.quorumPeer.follower.getSocket().close();
+
+          // wait for the falseLeader to disconnect
+          waitForOne(servers.zk[falseLeader], States.CONNECTING);
+
+          // convince falseLeader that it is the leader
+          servers.mt[falseLeader].main.quorumPeer.setPeerState(QuorumPeer.ServerState.LEADING);
+
+          // provide time for the falseleader to realize no followers have connected
+          // (this is twice the timeout used in Leader#getEpochToPropose)
+          Thread.sleep(2 * servers.mt[falseLeader].main.quorumPeer.initLimit * servers.mt[falseLeader].main.quorumPeer.tickTime);
+
+          // Restart leader election
+          servers.mt[falseLeader].main.quorumPeer.startLeaderElection();
+
+          // The previous client connection to falseLeader likely closed, create a new one
+          servers.zk[falseLeader] = new ZooKeeper("127.0.0.1:" + servers.mt[falseLeader].getClientPort(), ClientBase.CONNECTION_TIMEOUT, this);
+
+          // Wait for falseLeader to rejoin the quorum
+          waitForOne(servers.zk[falseLeader], States.CONNECTED);
+
+          // and ensure trueLeader is still the leader
+          Assert.assertTrue(servers.mt[trueLeader].main.quorumPeer.leader != null);
+
+          // Look through the logs for output that indicates the falseLeader is LEADING, then LOOKING, then FOLLOWING
+          LineNumberReader r = new LineNumberReader(new StringReader(os.toString()));
+          Pattern leading = Pattern.compile(".*myid=" + falseLeader + ".*LEADING.*");
+          Pattern looking = Pattern.compile(".*myid=" + falseLeader + ".*LOOKING.*");
+          Pattern following = Pattern.compile(".*myid=" + falseLeader + ".*FOLLOWING.*");
+
+          String line;
+          while ((line = r.readLine()) != null) {
+            if (!foundLeading) {
+              foundLeading = leading.matcher(line).matches();
+            } else if(!foundLooking) {
+              foundLooking = looking.matcher(line).matches();
+            } else if (following.matcher(line).matches()){
+              foundFollowing = true;
+              break;
+            }
+          }
+        } finally {
+          qlogger.removeAppender(appender);
+        }
+
+        Assert.assertTrue("falseLeader never attempts to become leader", foundLeading);
+        Assert.assertTrue("falseLeader never gives up on leadership", foundLooking);
+        Assert.assertTrue("falseLeader never rejoins the quorum", foundFollowing);
+    }
+
     private void waitForOne(ZooKeeper zk, States state) throws InterruptedException {
         int iterations = ClientBase.CONNECTION_TIMEOUT / 500;
         while (zk.getState() != state) {
@@ -371,40 +466,48 @@ public class QuorumPeerMainTest extends QuorumPeerTestBase {
         ZooKeeper zk[];
     }
 
-    /**
-     * This is a helper function for launching a set of servers
-     *
-     * @param numServers
-     * @return
-     * @throws IOException
-     * @throws InterruptedException
-     */
-    private Servers LaunchServers(int numServers) throws IOException, InterruptedException {
-        int SERVER_COUNT = numServers;
-        Servers svrs = new Servers();
-        final int clientPorts[] = new int[SERVER_COUNT];
-        StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < SERVER_COUNT; i++) {
-            clientPorts[i] = PortAssignment.unique();
-            sb.append("server."+i+"=127.0.0.1:"+PortAssignment.unique()+":"+PortAssignment.unique()+";"+clientPorts[i]+"\n");
-        }
-        String quorumCfgSection = sb.toString();
 
-        MainThread mt[] = new MainThread[SERVER_COUNT];
-        ZooKeeper zk[] = new ZooKeeper[SERVER_COUNT];
-        for (int i = 0; i < SERVER_COUNT; i++) {
-            mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection);
-            mt[i].start();
-            zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this);
-        }
-
-        waitForAll(zk, States.CONNECTED);
-
-        svrs.mt = mt;
-        svrs.zk = zk;
-        return svrs;
+  	private Servers LaunchServers(int numServers) throws IOException, InterruptedException {
+  	    return LaunchServers(numServers, null);
     }
 
+    /** * This is a helper function for launching a set of servers
+  	 *
+  	 * @param numServers* @param tickTime A ticktime to pass to MainThread
+  	 * @return
+  	 * @throws IOException
+  	 * @throws InterruptedException
+  	 */
+  	private Servers LaunchServers(int numServers, Integer tickTime) throws IOException, InterruptedException {
+  	    int SERVER_COUNT = numServers;
+  	    Servers svrs = new Servers();
+  	    final int clientPorts[] = new int[SERVER_COUNT];
+  	    StringBuilder sb = new StringBuilder();
+  	    for(int i = 0; i < SERVER_COUNT; i++) {
+  	        clientPorts[i] = PortAssignment.unique();
+  	        sb.append("server."+i+"=127.0.0.1:"+PortAssignment.unique()+":"+PortAssignment.unique()+";"+clientPorts[i]+"\n");
+  	    }
+  	    String quorumCfgSection = sb.toString();
+
+  	    MainThread mt[] = new MainThread[SERVER_COUNT];
+  	    ZooKeeper zk[] = new ZooKeeper[SERVER_COUNT];
+  	    for(int i = 0; i < SERVER_COUNT; i++) {
+  	        if (tickTime != null) {
+  	          mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection, new HashMap<String, String>(), tickTime);
+            } else {
+              mt[i] = new MainThread(i, clientPorts[i], quorumCfgSection);
+            }
+  	        mt[i].start();
+  	        zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this);
+  	    }
+  
+  	    waitForAll(zk, States.CONNECTED);
+  
+  	    svrs.mt = mt;
+  	    svrs.zk = zk;
+  	    return svrs;
+  	}
+
     /**
      * Verify handling of bad quorum address
      */

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
index 21aa819..a357a6e 100644
--- a/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
+++ b/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerTestBase.java
@@ -82,8 +82,18 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
         private String quorumCfgSection;
         private Map<String, String> otherConfigs;
 
+        /**
+         * Create a MainThread
+         *
+         * @param myid
+         * @param clientPort
+         * @param quorumCfgSection
+         * @param otherConfigs
+         * @param tickTime initLimit will be 10 and syncLimit will be 5
+         * @throws IOException
+         */
         public MainThread(int myid, int clientPort, String quorumCfgSection,
-                          Map<String, String> otherConfigs) throws IOException {
+                Map<String, String> otherConfigs, int tickTime) throws IOException {
             baseDir = ClientBase.createTmpDir();
             this.myid = myid;
             this.clientPort = clientPort;
@@ -94,7 +104,7 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
             confFile = new File(baseDir, "zoo.cfg");
 
             FileWriter fwriter = new FileWriter(confFile);
-            fwriter.write("tickTime=4000\n");
+            fwriter.write("tickTime=" + tickTime + "\n");
             fwriter.write("initLimit=10\n");
             fwriter.write("syncLimit=5\n");
 
@@ -296,6 +306,11 @@ public class QuorumPeerTestBase extends ZKTestCase implements Watcher {
                     new HashMap<String, String>());
         }
 
+        public MainThread(int myid, int clientPort, String quorumCfgSection,
+                          Map<String, String> otherConfigs) throws IOException {
+            this(myid, clientPort, quorumCfgSection, otherConfigs, 4000);
+        }
+
         Thread currentThread;
 
         synchronized public void start() {

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/f2cbcc7e/src/java/test/org/apache/zookeeper/test/QuorumTest.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/test/QuorumTest.java b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
index 469a9d1..cd1d153 100644
--- a/src/java/test/org/apache/zookeeper/test/QuorumTest.java
+++ b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
@@ -329,94 +329,6 @@ public class QuorumTest extends ZKTestCase {
         zk.close();
     }
 
-    /**
-     * Tests if closeSession can be logged before a leader gets established, which
-     * could lead to a locked-out follower (see ZOOKEEPER-790). 
-     * 
-     * The test works as follows. It has a client connecting to a follower f and
-     * sending batches of 1,000 updates. The goal is that f has a zxid higher than
-     * all other servers in the initial leader election. This way we can crash and
-     * recover the follower so that the follower believes it is the leader once it
-     * recovers (LE optimization: once a server receives a message from all other 
-     * servers, it picks a leader.
-     * 
-     * It also makes the session timeout very short so that we force the false 
-     * leader to close the session and write it to the log in the buggy code (before 
-     * ZOOKEEPER-790). Once f drops leadership and finds the current leader, its epoch
-     * is higher, and it rejects the leader. Now, if we prevent the leader from closing
-     * the session by only starting up (see Leader.lead()) once it obtains a quorum of 
-     * supporters, then f will find the current leader and support it because it won't
-     * have a highe epoch.
-     * 
-     */
-    @Test
-    public void testNoLogBeforeLeaderEstablishment () throws Exception {
-        final Semaphore sem = new Semaphore(0);
-
-        qu = new QuorumUtil(2, 10);
-        qu.startQuorum();
-
-        int index = 1;
-        while(qu.getPeer(index).peer.leader == null)
-            index++;
-
-        Leader leader = qu.getPeer(index).peer.leader;
-
-        Assert.assertNotNull(leader);
-
-        /*
-         * Reusing the index variable to select a follower to connect to
-         */
-        index = (index == 1) ? 2 : 1;
-
-        ZooKeeper zk = new DisconnectableZooKeeper(
-                "127.0.0.1:" + qu.getPeer(index).peer.getClientPort(),
-                ClientBase.CONNECTION_TIMEOUT, new Watcher() {
-            public void process(WatchedEvent event) { }
-          });
-
-        zk.create("/blah", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);      
-
-        for(int i = 0; i < 50000; i++) {
-            zk.setData("/blah", new byte[0], -1, new AsyncCallback.StatCallback() {
-                public void processResult(int rc, String path, Object ctx,
-                        Stat stat) {
-                    counter++;
-                    if (rc != 0) {
-                        errors++;
-                    }
-                    if(counter == 20000){
-                        sem.release();
-                    }
-                }
-            }, null);
-
-            if(i == 5000){
-                qu.shutdown(index);
-                LOG.info("Shutting down s1");
-            }
-            if(i == 12000){
-                qu.start(index);
-                LOG.info("Setting up server: " + index);
-            }
-            if((i % 1000) == 0){
-                Thread.sleep(500);
-            }
-        }
-
-        // Wait until all updates return
-        sem.tryAcquire(15, TimeUnit.SECONDS);
-
-        // Verify that server is following and has the same epoch as the leader
-        Assert.assertTrue("Not following", qu.getPeer(index).peer.follower != null);
-        long epochF = (qu.getPeer(index).peer.getActiveServer().getZxid() >> 32L);
-        long epochL = (leader.getEpoch() >> 32L);
-        Assert.assertTrue("Zxid: " + qu.getPeer(index).peer.getActiveServer().getZxid() + 
-                "Current epoch: " + epochF, epochF == epochL);
-
-        zk.close();
-    }
-
     // skip superhammer and clientcleanup as they are too expensive for quorum
 
     /**