You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by eo...@apache.org on 2019/12/13 07:51:53 UTC
[zookeeper] branch master updated: ZOOKEEPER-3644: Data loss after
upgrading standalone ZK server 3.4.14 to 3.5.6 with
snapshot.trust.empty=true
This is an automated email from the ASF dual-hosted git repository.
eolivelli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zookeeper.git
The following commit(s) were added to refs/heads/master by this push:
new a5d67c8 ZOOKEEPER-3644: Data loss after upgrading standalone ZK server 3.4.14 to 3.5.6 with snapshot.trust.empty=true
a5d67c8 is described below
commit a5d67c8ffcc18289aa89c892d948ad7c75a317b4
Author: Michael Han <ha...@apache.org>
AuthorDate: Fri Dec 13 08:51:38 2019 +0100
ZOOKEEPER-3644: Data loss after upgrading standalone ZK server 3.4.14 to 3.5.6 with snapshot.trust.empty=true
We didn't replay transactions during recovery when snapshot files are missing and snapshot.trust.empty is set to true; this leads to no actual data being applied to the deserialized in memory data tree. We should make sure always replay the transactions after snapshot loading is done.
Author: Michael Han <ha...@apache.org>
Reviewers: Enrico Olivelli <eo...@apache.org>
Closes #1177 from hanm/ZOOKEEPER-3644
---
.../server/persistence/FileTxnSnapLog.java | 45 ++++++++++++++--------
.../test/EmptiedSnapshotRecoveryTest.java | 4 +-
2 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 6b4dd2a..51f2dbb 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -85,9 +85,19 @@ public class FileTxnSnapLog {
* restored.
*/
public interface PlayBackListener {
-
void onTxnLoaded(TxnHeader hdr, Record rec);
+ }
+ /**
+ * Finalizing restore of data tree through
+ * a set of operations (replaying transaction logs,
+ * calculating data tree digests, and so on.).
+ */
+ private interface RestoreFinalizer {
+ /**
+ * @return the highest zxid of restored data tree.
+ */
+ long run() throws IOException;
}
/**
@@ -241,6 +251,23 @@ public class FileTxnSnapLog {
trustEmptyDB = autoCreateDB;
}
+ RestoreFinalizer finalizer = () -> {
+ long highestZxid = fastForwardFromEdits(dt, sessions, listener);
+ // The snapshotZxidDigest will reset after replaying the txn of the
+ // zxid in the snapshotZxidDigest, if it's not reset to null after
+ // restoring, it means either there are not enough txns to cover that
+ // zxid or that txn is missing
+ DataTree.ZxidDigest snapshotZxidDigest = dt.getDigestFromLoadedSnapshot();
+ if (snapshotZxidDigest != null) {
+ LOG.warn(
+ "Highest txn zxid 0x{} is not covering the snapshot digest zxid 0x{}, "
+ + "which might lead to inconsistent state",
+ Long.toHexString(highestZxid),
+ Long.toHexString(snapshotZxidDigest.getZxid()));
+ }
+ return highestZxid;
+ };
+
if (-1L == deserializeResult) {
/* this means that we couldn't find any snapshot, so we need to
* initialize an empty database (reported in ZOOKEEPER-2325) */
@@ -251,6 +278,7 @@ public class FileTxnSnapLog {
throw new IOException(EMPTY_SNAPSHOT_WARNING + "Something is broken!");
} else {
LOG.warn("{}This should only be allowed during upgrading.", EMPTY_SNAPSHOT_WARNING);
+ return finalizer.run();
}
}
@@ -269,20 +297,7 @@ public class FileTxnSnapLog {
}
}
- long highestZxid = fastForwardFromEdits(dt, sessions, listener);
- // The snapshotZxidDigest will reset after replaying the txn of the
- // zxid in the snapshotZxidDigest, if it's not reset to null after
- // restoring, it means either there are not enough txns to cover that
- // zxid or that txn is missing
- DataTree.ZxidDigest snapshotZxidDigest = dt.getDigestFromLoadedSnapshot();
- if (snapshotZxidDigest != null) {
- LOG.warn(
- "Highest txn zxid 0x{} is not covering the snapshot digest zxid 0x{}, "
- + "which might lead to inconsistent state",
- Long.toHexString(highestZxid),
- Long.toHexString(snapshotZxidDigest.getZxid()));
- }
- return highestZxid;
+ return finalizer.run();
}
/**
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
index a8d7169..da50a9c 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
@@ -18,6 +18,7 @@
package org.apache.zookeeper.test;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
@@ -100,10 +101,11 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {
zks = new ZooKeeperServer(tmpSnapDir, tmpLogDir, 3000);
try {
zks.startdata();
- zxid = zks.getZKDatabase().loadDataBase();
+ long currentZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
if (!trustEmptySnap) {
fail("Should have gotten exception for corrupted database");
}
+ assertEquals("zxid mismatch after restoring database", currentZxid, zxid);
} catch (IOException e) {
// expected behavior
if (trustEmptySnap) {