You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cl...@apache.org on 2019/12/12 20:13:06 UTC
[hadoop] 01/01: HDFS-15036. Active NameNode should not silently
fail the image transfer. Contributed by Chen Liang.
This is an automated email from the ASF dual-hosted git repository.
cliang pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit d9264b6ec86ae64240cf77b99fd351de1e8175f5
Author: Chen Liang <cl...@apache.org>
AuthorDate: Thu Dec 12 12:12:44 2019 -0800
HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
---
.../hadoop/hdfs/server/namenode/ImageServlet.java | 6 ++++++
.../hdfs/server/namenode/ha/StandbyCheckpointer.java | 12 +++++++++++-
.../org/apache/hadoop/hdfs/TestRollingUpgrade.java | 20 ++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
index ad8b159..3dcc168 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
@@ -573,7 +573,13 @@ public class ImageServlet extends HttpServlet {
long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
now - lastCheckpointTime);
+ // Since the goal of the check below is to prevent overly
+ // frequent upload from Standby, the check should only be done
+ // for the periodical upload from Standby. For the other
+ // scenarios such as rollback image and ckpt file, they skip
+ // this check, see HDFS-15036 for more info.
if (checkRecentImageEnable &&
+ NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) &&
timeDelta < checkpointPeriod &&
txid - lastCheckpointTxid < checkpointTxnCount) {
// only when at least one of two conditions are met we accept
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index c05a0da..5cac972 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -289,10 +289,20 @@ public class StandbyCheckpointer {
// TODO should there be some smarts here about retries nodes that
// are not the active NN?
CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
- if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
+ TransferFsImage.TransferResult uploadResult = upload.get();
+ if (uploadResult == TransferFsImage.TransferResult.SUCCESS) {
receiverEntry.setLastUploadTime(monotonicNow());
receiverEntry.setIsPrimary(true);
} else {
+ // Getting here means image upload is explicitly rejected
+ // by the other node. This could happen if:
+ // 1. the other is also a standby, or
+ // 2. the other is active, but already accepted another
+ // newer image, or
+ // 3. the other is active but has a recent enough image.
+ // All these are valid cases, just log for information.
+ LOG.info("Image upload rejected by the other NameNode: " +
+ uploadResult);
receiverEntry.setIsPrimary(false);
}
} catch (ExecutionException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
index 749ec96..579c5ef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
@@ -57,6 +57,7 @@ import org.junit.Assert;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
+import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
@@ -430,7 +431,22 @@ public class TestRollingUpgrade {
testFinalize(3);
}
+ @Test(timeout = 300000)
+ public void testFinalizeWithDeltaCheck() throws Exception {
+ testFinalize(2, true);
+ }
+
+ @Test(timeout = 300000)
+ public void testFinalizeWithMultipleNNDeltaCheck() throws Exception {
+ testFinalize(3, true);
+ }
+
private void testFinalize(int nnCount) throws Exception {
+ testFinalize(nnCount, false);
+ }
+
+ private void testFinalize(int nnCount, boolean skipImageDeltaCheck)
+ throws Exception {
final Configuration conf = new HdfsConfiguration();
MiniQJMHACluster cluster = null;
final Path foo = new Path("/foo");
@@ -449,6 +465,10 @@ public class TestRollingUpgrade {
dfsCluster.restartNameNodes();
dfsCluster.transitionToActive(0);
+
+ dfsCluster.getNameNode(0).getHttpServer()
+ .setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck);
+
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
dfs.mkdirs(foo);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org