You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ga...@apache.org on 2020/06/24 16:00:01 UTC
[hadoop] 02/02: HDFS-15323. StandbyNode fails transition to active
due to insufficient transaction tailing. Contributed by Konstantin V
Shvachko.
This is an automated email from the ASF dual-hosted git repository.
gabota pushed a commit to branch branch-3.1.4
in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit 4ef7dab2153d1ad30023c43da8954fceb6a01ccb
Author: Konstantin V Shvachko <sh...@apache.org>
AuthorDate: Mon May 4 10:29:50 2020 -0700
HDFS-15323. StandbyNode fails transition to active due to insufficient transaction tailing. Contributed by Konstantin V Shvachko.
(cherry picked from commit ebb878bab991c242b5089a18881aa10abf318ea0)
---
.../hdfs/qjournal/client/QuorumJournalManager.java | 4 ++--
.../hdfs/server/namenode/ha/EditLogTailer.java | 24 +++++++++++++------
.../namenode/ha/TestStandbyInProgressTail.java | 28 ++++++++++++++++++++++
3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
index 94b5832..f8ebd89 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
@@ -73,9 +73,9 @@ public class QuorumJournalManager implements JournalManager {
static final Log LOG = LogFactory.getLog(QuorumJournalManager.class);
// This config is not publicly exposed
- static final String QJM_RPC_MAX_TXNS_KEY =
+ public static final String QJM_RPC_MAX_TXNS_KEY =
"dfs.ha.tail-edits.qjm.rpc.max-txns";
- static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000;
+ public static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000;
// Maximum number of transactions to fetch at a time when using the
// RPC edit fetch mechanism
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index ca231b4..276b76f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -299,13 +299,23 @@ public class EditLogTailer {
SecurityUtil.doAsLoginUser(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
- try {
- // It is already under the full name system lock and the checkpointer
- // thread is already stopped. No need to acqure any other lock.
- doTailEdits();
- } catch (InterruptedException e) {
- throw new IOException(e);
- }
+ long editsTailed = 0;
+ // Fully tail the journal to the end
+ do {
+ long startTime = Time.monotonicNow();
+ try {
+ NameNode.getNameNodeMetrics().addEditLogTailInterval(
+ startTime - lastLoadTimeMs);
+ // It is already under the name system lock and the checkpointer
+ // thread is already stopped. No need to acquire any other lock.
+ editsTailed = doTailEdits();
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ } finally {
+ NameNode.getNameNodeMetrics().addEditLogTailTime(
+ Time.monotonicNow() - startTime);
+ }
+ } while(editsTailed > 0);
return null;
}
});
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java
index 0420579..7692966 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.test.GenericTestUtils;
import static org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter.getFileInfo;
+import static org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.QJM_RPC_MAX_TXNS_KEY;
import org.junit.After;
import org.junit.Before;
@@ -72,6 +73,8 @@ public class TestStandbyInProgressTail {
conf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true);
conf.setInt(DFSConfigKeys.DFS_QJOURNAL_SELECT_INPUT_STREAMS_TIMEOUT_KEY,
500);
+ // Set very samll limit of transactions per a journal rpc call
+ conf.setInt(QJM_RPC_MAX_TXNS_KEY, 3);
HAUtil.setAllowStandbyReads(conf, true);
qjmhaCluster = new MiniQJMHACluster.Builder(conf).build();
cluster = qjmhaCluster.getDfsCluster();
@@ -300,6 +303,31 @@ public class TestStandbyInProgressTail {
waitForFileInfo(nn1, "/test", "/test2", "/test3");
}
+ /**
+ * Test that Standby Node tails multiple segments while catching up
+ * during the transition to Active.
+ */
+ @Test
+ public void testUndertailingWhileFailover() throws Exception {
+ cluster.transitionToActive(0);
+ cluster.waitActive(0);
+
+ String p = "/testFailoverWhileTailingWithoutCache/";
+ mkdirs(nn0, p + 0, p + 1, p + 2, p + 3, p + 4);
+ nn0.getRpcServer().rollEditLog(); // create segment 1
+
+ mkdirs(nn0, p + 5, p + 6, p + 7, p + 8, p + 9);
+ nn0.getRpcServer().rollEditLog(); // create segment 2
+
+ mkdirs(nn0, p + 10, p + 11, p + 12, p + 13, p + 14);
+ nn0.getRpcServer().rollEditLog(); // create segment 3
+
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+ cluster.waitActive(1);
+ waitForFileInfo(nn1, p + 0, p + 1, p + 14);
+ }
+
@Test
public void testNonUniformConfig() throws Exception {
// Test case where some NNs (in this case the active NN) in the cluster
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org