You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by eo...@apache.org on 2019/09/02 06:23:12 UTC

[zookeeper] branch master updated: ZOOKEEPER-3056: Fails to load database with missing snapshot file but with valid transaction log file.

This is an automated email from the ASF dual-hosted git repository.

eolivelli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 68c2198  ZOOKEEPER-3056: Fails to load database with missing snapshot file but with valid transaction log file.
68c2198 is described below

commit 68c21988d55c57e483370d3ee223c22da2d1bbcf
Author: Michael Han <lh...@twitter.com>
AuthorDate: Mon Sep 2 08:23:06 2019 +0200

    ZOOKEEPER-3056: Fails to load database with missing snapshot file but with valid transaction log file.
    
    ZOOKEEPER-2325 introduced a check on snapshot and transaction log files during recovery, which will treat empty or missing snapshot files with the presence of transaction log files illegal state, and abort start up process.
    
    For old versions of ZooKeeper, it's possible to have valid transaction log files but no snapshot. For example, if snap count set too low, or server crashes before the first snapshot was taken. For new versions of ZooKeeper with ZOOKEEPER-2325, this is not a problem as ZooKeeper will make sure the presence of at least one snapshot file (e.g. on a fresh start with empty local data, ZooKeeper will create one snapshot file as part of the start up.).
    
    So we need provide a smooth upgrade path for users with old version of ZooKeeper. This patch introduces a system property flag, once set, will skip the empty snapshot validation during start up. The default value of this flag is false, as we don't want to disable the features introduced in ZOOKEEPER-2325. We also want the flag to be explicitly set by admin, as ZooKeeper itself is not able to tell when to trust missing snapshot or not.
    
    Author: Michael Han <lh...@twitter.com>
    
    Reviewers: Enrico Olivelli <eo...@apache.org>, Brian Nixon <ni...@fb.com>
    
    Closes #1069 from hanm/ZOOKEEPER-3056
---
 .../src/main/resources/markdown/zookeeperAdmin.md  | 14 +++++++++++
 .../server/persistence/FileTxnSnapLog.java         | 18 ++++++++++++--
 .../test/EmptiedSnapshotRecoveryTest.java          | 28 ++++++++++++++++++----
 3 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
index d3bcaff..c4cc747 100644
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
@@ -915,6 +915,20 @@ property, when available, is noted below.
     ZooKeeper when loading database from disk, and syncing with leader.
     By default, this feautre is disabled, set "true" to enable it.
 
+* *snapshot.trust.empty* :
+    (Java system property only: **zookeeper.snapshot.trust.empty**)
+    **New in 3.5.6:**
+    This property controls whether or not ZooKeeper should treat missing
+    snapshot files as a fatal state that can't be recovered from.
+    Set to true to allow ZooKeeper servers recover without snapshot
+    files. This should only be set during upgrading from old versions of
+    ZooKeeper (3.4.x, pre 3.5.3) where ZooKeeper might only have transaction
+    log files but without presence of snapshot files. If the value is set
+    during upgrade, we recommend to set the value back to false after upgrading
+    and restart ZooKeeper process so ZooKeeper can continue normal data
+    consistency check during recovery process.
+    Default value is false.
+
 <a name="sc_clusterOptions"></a>
 
 #### Cluster Options
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index c590765..cf08f6e 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -59,6 +59,7 @@ public class FileTxnSnapLog {
     TxnLog txnLog;
     SnapShot snapLog;
     private final boolean autoCreateDB;
+    private final boolean trustEmptySnapshot;
     public static final int VERSION = 2;
     public static final String version = "version-";
 
@@ -72,6 +73,10 @@ public class FileTxnSnapLog {
 
     private static final String ZOOKEEPER_DB_AUTOCREATE_DEFAULT = "true";
 
+    public static final String ZOOKEEPER_SNAPSHOT_TRUST_EMPTY = "zookeeper.snapshot.trust.empty";
+
+    private static final String EMPTY_SNAPSHOT_WARNING = "No snapshot found, but there are log entries. ";
+
     /**
      * This listener helps
      * the external apis calling
@@ -102,6 +107,9 @@ public class FileTxnSnapLog {
         boolean enableAutocreate = Boolean.parseBoolean(
             System.getProperty(ZOOKEEPER_DATADIR_AUTOCREATE, ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT));
 
+        trustEmptySnapshot = Boolean.getBoolean(ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
+        LOG.info(ZOOKEEPER_SNAPSHOT_TRUST_EMPTY + " : " + trustEmptySnapshot);
+
         if (!this.dataDir.exists()) {
             if (!enableAutocreate) {
                 throw new DatadirException(String.format(
@@ -232,11 +240,18 @@ public class FileTxnSnapLog {
         } else {
             trustEmptyDB = autoCreateDB;
         }
+
         if (-1L == deserializeResult) {
             /* this means that we couldn't find any snapshot, so we need to
              * initialize an empty database (reported in ZOOKEEPER-2325) */
             if (txnLog.getLastLoggedZxid() != -1) {
-                throw new IOException("No snapshot found, but there are log entries. " + "Something is broken!");
+                // ZOOKEEPER-3056: provides an escape hatch for users upgrading
+                // from old versions of zookeeper (3.4.x, pre 3.5.3).
+                if (!trustEmptySnapshot) {
+                    throw new IOException(EMPTY_SNAPSHOT_WARNING + "Something is broken!");
+                } else {
+                    LOG.warn(EMPTY_SNAPSHOT_WARNING + "This should only be allowed during upgrading.");
+                }
             }
 
             if (trustEmptyDB) {
@@ -600,5 +615,4 @@ public class FileTxnSnapLog {
     public long getTotalLogSize() {
         return txnLog.getTotalLogSize();
     }
-
 }
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
index 571636c..ea2bff1 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
@@ -39,7 +39,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
 import org.junit.Test;
 
 /** If snapshots are corrupted to the empty file or deleted, Zookeeper should
- *  not proceed to read its transactiong log files
+ *  not proceed to read its transaction log files
  *  Test that zxid == -1 in the presence of emptied/deleted snapshots
  */
 public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
@@ -50,7 +50,7 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
     private static final int N_TRANSACTIONS = 150;
     private static final int SNAP_COUNT = 100;
 
-    public void runTest(boolean leaveEmptyFile) throws Exception {
+    public void runTest(boolean leaveEmptyFile, boolean trustEmptySnap) throws Exception {
         File tmpSnapDir = ClientBase.createTmpDir();
         File tmpLogDir = ClientBase.createTmpDir();
         ClientBase.setupTestEnv();
@@ -92,15 +92,28 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
             }
         }
 
+        if (trustEmptySnap) {
+            System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
+        }
         // start server again with corrupted database
         zks = new ZooKeeperServer(tmpSnapDir, tmpLogDir, 3000);
         try {
             zks.startdata();
             zxid = zks.getZKDatabase().loadDataBase();
-            fail("Should have gotten exception for corrupted database");
+            if (!trustEmptySnap) {
+                fail("Should have gotten exception for corrupted database");
+            }
         } catch (IOException e) {
             // expected behavior
+            if (trustEmptySnap) {
+                fail("Should not get exception for empty database");
+            }
+        } finally {
+            if (trustEmptySnap) {
+                System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
+            }
         }
+
         zks.shutdown();
     }
 
@@ -110,7 +123,7 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
      */
     @Test
     public void testRestoreWithEmptySnapFiles() throws Exception {
-        runTest(true);
+        runTest(true, false);
     }
 
     /**
@@ -119,7 +132,12 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
      */
     @Test
     public void testRestoreWithNoSnapFiles() throws Exception {
-        runTest(false);
+        runTest(false, false);
+    }
+
+    @Test
+    public void testRestoreWithTrustedEmptySnapFiles() throws Exception {
+        runTest(false, true);
     }
 
     public void process(WatchedEvent event) {