You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by sy...@apache.org on 2022/05/16 17:59:03 UTC

[zookeeper] branch branch-3.5 updated: ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true

This is an automated email from the ASF dual-hosted git repository.

symat pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new d1ec2f346 ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true
d1ec2f346 is described below

commit d1ec2f346b43ce6c00be45a68871fb7bda6e098a
Author: Stig Rohde Døssing <st...@humio.com>
AuthorDate: Sat Mar 6 19:38:43 2021 +0000

    ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true
    
    snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to later Zookeeper
    versions, allowing nodes to start with a non-empty transaction log but no snapshot.
    
    The intent is for this setting to be enabled for a short while during the upgrade,
    and then disabled again, as the check it disables is a safety feature.
    
    Prior to this PR, a node would only write a snapshot locally if it became leader,
    or if it had fallen so far behind the leader that the leader sent a SNAP message instead
    of a DIFF. This made the upgrade process inconvenient, as not all nodes would create
    a snapshot when snapshot.trust.empty was true, meaning that the safety check could
    not be flipped back on.
    
    This PR makes follower nodes write a local snapshot when they receive NEWLEADER,
    if they have no local snapshot and snapshot.trust.empty is true.
    
    Author: Stig Rohde Døssing <st...@humio.com>
    
    Reviewers: Enrico Olivelli <eo...@apache.org>, Damien Diederen <dd...@apache.org>
    
    Closes #1581 from srdo/zookeeper-3781
    
    (cherry picked from commit 1214d3bf611d153ae8c3987523da01d3d6c82686)
    Signed-off-by: Damien Diederen <dd...@apache.org>
    (cherry picked from commit 679cc2b1015db6a0b41cf9223c826a4b565387e9)
---
 .../apache/zookeeper/server/ZooKeeperServer.java   |  4 ++
 .../server/persistence/FileTxnSnapLog.java         |  9 ++++
 .../apache/zookeeper/server/quorum/Learner.java    |  8 +++-
 .../test/EmptiedSnapshotRecoveryTest.java          | 51 +++++++++++++++++++++-
 .../java/org/apache/zookeeper/test/QuorumUtil.java | 24 +++++-----
 5 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
index 2bab9378f..a12c1117d 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -353,6 +353,10 @@ public class ZooKeeperServer implements SessionExpirer, ServerStats.Provider {
         }
     }
 
+    public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+        return txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection();
+    }
+
     @Override
     public long getDataDirSize() {
         if (zkDb == null) {
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 0267eb480..ff9b914d6 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -217,6 +217,15 @@ public class FileTxnSnapLog {
         return this.snapLog.getLastSnapshotInfo();
     }
 
+    /**
+     * whether to force the write of an initial snapshot after a leader election,
+     * to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x.
+     * @return true if an initial snapshot should be written even if not otherwise required, false otherwise.
+     */
+    public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+        return trustEmptySnapshot && getLastSnapshotInfo() == null;
+    }
+
     /**
      * this function restores the server
      * database after reading from the
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
index 51b103882..a8d89b28d 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
@@ -390,7 +390,13 @@ public class Learner {
         synchronized (zk) {
             if (qp.getType() == Leader.DIFF) {
                 LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
-                snapshotNeeded = false;
+                if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) {
+                    LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading.");
+                    snapshotNeeded = true;
+                    syncSnapshot = true;
+                } else {
+                    snapshotNeeded = false;
+                }
             }
             else if (qp.getType() == Leader.SNAP) {
                 LOG.info("Getting a snapshot from leader 0x" + Long.toHexString(qp.getZxid()));
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
index 2ff750ea1..64c48ef5e 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
 import java.io.File;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.nio.file.Files;
 import java.util.List;
 
 import org.apache.log4j.Logger;
@@ -39,7 +40,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
 import org.junit.Assert;
 import org.junit.Test;
 
-/** If snapshots are corrupted to the empty file or deleted, Zookeeper should 
+/** If snapshots are corrupted to the empty file or deleted, Zookeeper should
  *  not proceed to read its transaction log files
  *  Test that zxid == -1 in the presence of emptied/deleted snapshots
  */
@@ -145,6 +146,54 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements  Watcher
         runTest(false, true);
     }
 
+    @Test
+    public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception {
+        QuorumUtil qu = new QuorumUtil(1);
+        try {
+            qu.startAll();
+            String connString = qu.getConnectionStringForServer(1);
+            try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
+                for (int i = 0; i < N_TRANSACTIONS; i++) {
+                    zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+                }
+            }
+            int leaderIndex = qu.getLeaderServer();
+            //Shut down the cluster and delete the snapshots from the followers
+            for (int i = 1; i <= qu.ALL; i++) {
+                qu.shutdown(i);
+                if (i != leaderIndex) {
+                    FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
+                    List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
+                    Assert.assertTrue("We have a snapshot to corrupt", snapshots.size() > 0);
+                    for (File file : snapshots) {
+                        Files.delete(file.toPath());
+                    }
+                    assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0);
+                }
+            }
+            //Start while trusting empty snapshots, verify that the followers save snapshots
+            System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
+            qu.start(leaderIndex);
+            for (int i = 1; i <= qu.ALL; i++) {
+                if (i != leaderIndex) {
+                    qu.restart(i);
+                    FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
+                    List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
+                    Assert.assertTrue("A snapshot should have been created on follower " + i, snapshots.size() > 0);
+                }
+            }
+            //Check that the created nodes are still there
+            try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
+                for (int i = 0; i < N_TRANSACTIONS; i++) {
+                    Assert.assertNotNull(zk.exists("/node-" + i, false));
+                }
+            }
+        } finally {
+            System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
+            qu.tearDown();
+        }
+    }
+
     public void process(WatchedEvent event) {
         // do nothing
     }
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
index 113452a95..d6e8b9963 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
@@ -107,7 +107,7 @@ public class QuorumUtil {
                 ps.clientPort = PortAssignment.unique();
                 peers.put(i, ps);
 
-                peersView.put(Long.valueOf(i), new QuorumServer(i, 
+                peersView.put(Long.valueOf(i), new QuorumServer(i,
                                new InetSocketAddress("127.0.0.1", PortAssignment.unique()),
                                new InetSocketAddress("127.0.0.1", PortAssignment.unique()),
                                new InetSocketAddress("127.0.0.1", ps.clientPort),
@@ -136,7 +136,7 @@ public class QuorumUtil {
 
     // This was added to avoid running into the problem of ZOOKEEPER-1539
     public boolean disableJMXTest = false;
-    
+
 
     public void enableLocalSession(boolean localSessionEnabled) {
         this.localSessionEnabled = localSessionEnabled;
@@ -158,7 +158,7 @@ public class QuorumUtil {
 
         // This was added to avoid running into the problem of ZOOKEEPER-1539
         if (disableJMXTest) return;
-        
+
         // interesting to see what's there...
         try {
             JMXEnv.dump();
@@ -250,22 +250,22 @@ public class QuorumUtil {
     public void shutdown(int id) {
         QuorumPeer qp = getPeer(id).peer;
         try {
-            LOG.info("Shutting down quorum peer " + qp.getName());
+            LOG.info("Shutting down quorum peer {} with id {}", qp.getName(), id);
             qp.shutdown();
             Election e = qp.getElectionAlg();
             if (e != null) {
-                LOG.info("Shutting down leader election " + qp.getName());
+                LOG.info("Shutting down leader election {} with id {}", qp.getName(), id);
                 e.shutdown();
             } else {
-                LOG.info("No election available to shutdown " + qp.getName());
+                LOG.info("No election available to shutdown {} with id {}", qp.getName(), id);
             }
-            LOG.info("Waiting for " + qp.getName() + " to exit thread");
+            LOG.info("Waiting for {} with id {} to exit thread", qp.getName(), id);
             qp.join(30000);
             if (qp.isAlive()) {
-                Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName());
+                Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName() + " " + id);
             }
         } catch (InterruptedException e) {
-            LOG.debug("QP interrupted: " + qp.getName(), e);
+            LOG.debug("QP interrupted: {} {}", qp.getName(), id, e);
         }
     }
 
@@ -293,11 +293,11 @@ public class QuorumUtil {
     }
 
     public List<QuorumPeer> getFollowerQuorumPeers() {
-        List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1); 
+        List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1);
 
         for (PeerStruct ps: peers.values()) {
             if (ps.peer.leader == null) {
-               peerList.add(ps.peer);      
+               peerList.add(ps.peer);
             }
         }
 
@@ -308,7 +308,7 @@ public class QuorumUtil {
         LOG.info("TearDown started");
 
         OSMXBean osMbean = new OSMXBean();
-        if (osMbean.getUnix() == true) {    
+        if (osMbean.getUnix() == true) {
             LOG.info("fdcount after test is: " + osMbean.getOpenFileDescriptorCount());
         }