You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ar...@apache.org on 2021/03/31 21:21:07 UTC
[zookeeper] branch branch-3.7 updated: ZOOKEEPER-4269:
acceptedEpoch.tmp rename failure will cause server startup error
This is an automated email from the ASF dual-hosted git repository.
arshad pushed a commit to branch branch-3.7
in repository https://gitbox.apache.org/repos/asf/zookeeper.git
The following commit(s) were added to refs/heads/branch-3.7 by this push:
new e892430 ZOOKEEPER-4269: acceptedEpoch.tmp rename failure will cause server startup error
e892430 is described below
commit e892430a2c72188193300e44be9e0724caee41e9
Author: Mohammad Arshad <ar...@apache.org>
AuthorDate: Thu Apr 1 02:52:49 2021 +0530
ZOOKEEPER-4269: acceptedEpoch.tmp rename failure will cause server startup error
Using accepted epoch from acceptedEpoch.tmp if it is available
Author: Mohammad Arshad <ar...@apache.org>
Reviewers: Enrico Olivelli <eo...@apache.org>,Damien Diederen <dd...@crosstwine.com>
Closes #1664 from arshadmohammad/ZOOKEEPER-4269-master and squashes the following commits:
d8ae084c1 [Mohammad Arshad] Handled review comments
a2cc9a5b6 [Mohammad Arshad] Added test cases
b56837e55 [Mohammad Arshad] ZOOKEEPER-4269: acceptedEpoch.tmp rename failure will cause server startup error Using accepted epoch from acceptedEpoch.tmp if it is available
(cherry picked from commit cdddda4c55acf29d4e0b2bc8f3de7b5c676e8ffc)
Signed-off-by: Mohammad Arshad <ar...@apache.org>
---
.../zookeeper/common/AtomicFileOutputStream.java | 2 +-
.../apache/zookeeper/server/quorum/QuorumPeer.java | 17 ++-
.../quorum/CurrentEpochWriteFailureTest.java | 118 +++++++++++++++++++++
3 files changed, 132 insertions(+), 5 deletions(-)
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java b/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java
index 1574815..d6b7cf6 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/common/AtomicFileOutputStream.java
@@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
*/
public class AtomicFileOutputStream extends FilterOutputStream {
- private static final String TMP_EXTENSION = ".tmp";
+ public static final String TMP_EXTENSION = ".tmp";
private static final Logger LOG = LoggerFactory.getLogger(AtomicFileOutputStream.class);
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
index 3102c63..48d2afd 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
@@ -52,6 +52,7 @@ import java.util.stream.IntStream;
import javax.security.sasl.SaslException;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException.BadArgumentsException;
+import org.apache.zookeeper.common.AtomicFileOutputStream;
import org.apache.zookeeper.common.AtomicFileWritingIdiom;
import org.apache.zookeeper.common.AtomicFileWritingIdiom.WriterStatement;
import org.apache.zookeeper.common.QuorumX509Util;
@@ -1162,10 +1163,18 @@ public class QuorumPeer extends ZooKeeperThread implements QuorumStats.Provider
writeLongToFile(CURRENT_EPOCH_FILENAME, currentEpoch);
}
if (epochOfZxid > currentEpoch) {
- throw new IOException("The current epoch, "
- + ZxidUtils.zxidToString(currentEpoch)
- + ", is older than the last zxid, "
- + lastProcessedZxid);
+ // acceptedEpoch.tmp file in snapshot directory
+ File currentTmp = new File(getTxnFactory().getSnapDir(),
+ CURRENT_EPOCH_FILENAME + AtomicFileOutputStream.TMP_EXTENSION);
+ if (currentTmp.exists()) {
+ long epochOfTmp = readLongFromFile(currentTmp.getName());
+ LOG.info("{} found. Setting current epoch to {}.", currentTmp, epochOfTmp);
+ setCurrentEpoch(epochOfTmp);
+ } else {
+ throw new IOException(
+ "The current epoch, " + ZxidUtils.zxidToString(currentEpoch)
+ + ", is older than the last zxid, " + lastProcessedZxid);
+ }
}
try {
acceptedEpoch = readLongFromFile(ACCEPTED_EPOCH_FILENAME);
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java
new file mode 100644
index 0000000..ccd4d49
--- /dev/null
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CurrentEpochWriteFailureTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zookeeper.server.quorum;
+
+import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.io.File;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.common.AtomicFileOutputStream;
+import org.apache.zookeeper.test.ClientBase;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CurrentEpochWriteFailureTest extends QuorumPeerTestBase {
+ protected static final Logger LOG = LoggerFactory.getLogger(CurrentEpochWriteFailureTest.class);
+ private Servers servers;
+ private int clientPort;
+
+ @AfterEach
+ public void tearDown() throws InterruptedException {
+ if (servers != null) {
+ servers.shutDownAllServers();
+ }
+ }
+
+ /*
+ * ZOOKEEPER-4269:
+ * accepted epoch is first written to temporary file acceptedEpoch.tmp then this file is
+ * renamed to acceptedEpoch.
+ * Failure, either because of exception or power-off, in renaming the acceptedEpoch.tmp file
+ * will cause server startup error with message "The current epoch, x, is older than the last
+ * zxid y"
+ * To handle this scenario we should read accepted epoch from this temp file as well.
+ */
+ @Test
+ public void testReadCurrentEpochFromAcceptedEpochTmpFile() throws Exception {
+ startServers();
+ writeSomeData();
+
+ restartServers();
+ writeSomeData();
+
+ MainThread firstServer = servers.mt[0];
+
+ // As started servers two times, current epoch must be two
+ long currentEpoch = firstServer.getQuorumPeer().getCurrentEpoch();
+ assertEquals(2, currentEpoch);
+
+ // Initialize files for later use
+ File snapDir = firstServer.getQuorumPeer().getTxnFactory().getSnapDir();
+ File currentEpochFile = new File(snapDir, QuorumPeer.CURRENT_EPOCH_FILENAME);
+ File currentEpochTempFile = new File(snapDir,
+ QuorumPeer.CURRENT_EPOCH_FILENAME + AtomicFileOutputStream.TMP_EXTENSION);
+
+ // Shutdown servers
+ servers.shutDownAllServers();
+ waitForAll(servers, ZooKeeper.States.CONNECTING);
+
+ // Create scenario of file currentEpoch.tmp rename to currentEpoch failure.
+ // In this case currentEpoch file will have old epoch and currentEpoch.tmp will have the latest epoch
+ FileUtils.write(currentEpochFile, Long.toString(currentEpoch - 1), "UTF-8");
+ FileUtils.write(currentEpochTempFile, Long.toString(currentEpoch), "UTF-8");
+
+ // Restart the serves, all serves should restart successfully.
+ servers.restartAllServersAndClients(this);
+
+ // Check the first server where problem was injected.
+ assertTrue(ClientBase
+ .waitForServerUp("127.0.0.1:" + firstServer.getClientPort(), CONNECTION_TIMEOUT),
+ "server " + firstServer.getMyid()
+ + " is not up as file currentEpoch.tmp rename to currentEpoch file was failed"
+ + " which lead current epoch inconsistent state.");
+ }
+
+ private void restartServers() throws InterruptedException, IOException {
+ servers.shutDownAllServers();
+ waitForAll(servers, ZooKeeper.States.CONNECTING);
+ servers.restartAllServersAndClients(this);
+ waitForAll(servers, ZooKeeper.States.CONNECTED);
+ }
+
+ private void writeSomeData() throws Exception {
+ ZooKeeper client = ClientBase.createZKClient("127.0.0.1:" + clientPort);
+ String path = "/somePath" + System.currentTimeMillis();
+ String data = "someData";
+ client.create(path, data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+ byte[] data1 = client.getData(path, false, null);
+ assertEquals(data, new String(data1));
+ client.close();
+ }
+
+ private void startServers() throws Exception {
+ servers = LaunchServers(3);
+ clientPort = servers.clientPorts[0];
+ }
+}