You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cl...@apache.org on 2019/12/12 23:59:09 UTC
[hadoop] branch branch-3.1 updated: HDFS-15036. Active NameNode
should not silently fail the image transfer. Contributed by Chen Liang.
This is an automated email from the ASF dual-hosted git repository.
cliang pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new ea0eeb8 HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
ea0eeb8 is described below
commit ea0eeb8f1ab710cbe552c99afbbd41625656f8d0
Author: Chen Liang <cl...@apache.org>
AuthorDate: Thu Dec 12 10:22:05 2019 -0800
HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
(cherry picked from commit 65c4660bcd897e139fc175ca438cff75ec0c6be8)
(cherry picked from commit d4a6901c429a24dffdb5c884b1ba8e6492c233fe)
---
.../hadoop/hdfs/server/namenode/ImageServlet.java | 6 ++++++
.../hdfs/server/namenode/ha/StandbyCheckpointer.java | 12 +++++++++++-
.../org/apache/hadoop/hdfs/TestRollingUpgrade.java | 20 ++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
index 70cb62f..5b3e64b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
@@ -573,7 +573,13 @@ public class ImageServlet extends HttpServlet {
long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
now - lastCheckpointTime);
+ // Since the goal of the check below is to prevent overly
+ // frequent upload from Standby, the check should only be done
+ // for the periodical upload from Standby. For the other
+ // scenarios such as rollback image and ckpt file, they skip
+ // this check, see HDFS-15036 for more info.
if (checkRecentImageEnable &&
+ NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) &&
timeDelta < checkpointPeriod &&
txid - lastCheckpointTxid < checkpointTxnCount) {
// only when at least one of two conditions are met we accept
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index d338155..daa836a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -292,10 +292,20 @@ public class StandbyCheckpointer {
// TODO should there be some smarts here about retries nodes that
// are not the active NN?
CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
- if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
+ TransferFsImage.TransferResult uploadResult = upload.get();
+ if (uploadResult == TransferFsImage.TransferResult.SUCCESS) {
receiverEntry.setLastUploadTime(monotonicNow());
receiverEntry.setIsPrimary(true);
} else {
+ // Getting here means image upload is explicitly rejected
+ // by the other node. This could happen if:
+ // 1. the other is also a standby, or
+ // 2. the other is active, but already accepted another
+ // newer image, or
+ // 3. the other is active but has a recent enough image.
+ // All these are valid cases, just log for information.
+ LOG.info("Image upload rejected by the other NameNode: {}",
+ uploadResult);
receiverEntry.setIsPrimary(false);
}
} catch (ExecutionException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
index 0545b04..489cd1e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Assert;
import org.junit.Test;
+import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
@@ -431,7 +432,22 @@ public class TestRollingUpgrade {
testFinalize(3);
}
+ @Test(timeout = 300000)
+ public void testFinalizeWithDeltaCheck() throws Exception {
+ testFinalize(2, true);
+ }
+
+ @Test(timeout = 300000)
+ public void testFinalizeWithMultipleNNDeltaCheck() throws Exception {
+ testFinalize(3, true);
+ }
+
private void testFinalize(int nnCount) throws Exception {
+ testFinalize(nnCount, false);
+ }
+
+ private void testFinalize(int nnCount, boolean skipImageDeltaCheck)
+ throws Exception {
final Configuration conf = new HdfsConfiguration();
MiniQJMHACluster cluster = null;
final Path foo = new Path("/foo");
@@ -450,6 +466,10 @@ public class TestRollingUpgrade {
dfsCluster.restartNameNodes();
dfsCluster.transitionToActive(0);
+
+ dfsCluster.getNameNode(0).getHttpServer()
+ .setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck);
+
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
dfs.mkdirs(foo);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org