You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cl...@apache.org on 2019/12/12 23:59:09 UTC

[hadoop] branch branch-3.1 updated: HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.

This is an automated email from the ASF dual-hosted git repository.

cliang pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new ea0eeb8  HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
ea0eeb8 is described below

commit ea0eeb8f1ab710cbe552c99afbbd41625656f8d0
Author: Chen Liang <cl...@apache.org>
AuthorDate: Thu Dec 12 10:22:05 2019 -0800

    HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
    
    (cherry picked from commit 65c4660bcd897e139fc175ca438cff75ec0c6be8)
    (cherry picked from commit d4a6901c429a24dffdb5c884b1ba8e6492c233fe)
---
 .../hadoop/hdfs/server/namenode/ImageServlet.java    |  6 ++++++
 .../hdfs/server/namenode/ha/StandbyCheckpointer.java | 12 +++++++++++-
 .../org/apache/hadoop/hdfs/TestRollingUpgrade.java   | 20 ++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
index 70cb62f..5b3e64b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
@@ -573,7 +573,13 @@ public class ImageServlet extends HttpServlet {
               long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
                   now - lastCheckpointTime);
 
+              // Since the goal of the check below is to prevent overly
+              // frequent upload from Standby, the check should only be done
+              // for the periodical upload from Standby. For the other
+              // scenarios such as rollback image and ckpt file, they skip
+              // this check, see HDFS-15036 for more info.
               if (checkRecentImageEnable &&
+                  NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) &&
                   timeDelta < checkpointPeriod &&
                   txid - lastCheckpointTxid < checkpointTxnCount) {
                 // only when at least one of two conditions are met we accept
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index d338155..daa836a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -292,10 +292,20 @@ public class StandbyCheckpointer {
         // TODO should there be some smarts here about retries nodes that
         //  are not the active NN?
         CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
-        if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
+        TransferFsImage.TransferResult uploadResult = upload.get();
+        if (uploadResult == TransferFsImage.TransferResult.SUCCESS) {
           receiverEntry.setLastUploadTime(monotonicNow());
           receiverEntry.setIsPrimary(true);
         } else {
+          // Getting here means image upload is explicitly rejected
+          // by the other node. This could happen if:
+          // 1. the other is also a standby, or
+          // 2. the other is active, but already accepted another
+          // newer image, or
+          // 3. the other is active but has a recent enough image.
+          // All these are valid cases, just log for information.
+          LOG.info("Image upload rejected by the other NameNode: {}",
+              uploadResult);
           receiverEntry.setIsPrimary(false);
         }
       } catch (ExecutionException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
index 0545b04..489cd1e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Test;
 
+import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
@@ -431,7 +432,22 @@ public class TestRollingUpgrade {
     testFinalize(3);
   }
 
+  @Test(timeout = 300000)
+  public void testFinalizeWithDeltaCheck() throws Exception {
+    testFinalize(2, true);
+  }
+
+  @Test(timeout = 300000)
+  public void testFinalizeWithMultipleNNDeltaCheck() throws Exception {
+    testFinalize(3, true);
+  }
+
   private void testFinalize(int nnCount) throws Exception {
+    testFinalize(nnCount, false);
+  }
+
+  private void testFinalize(int nnCount, boolean skipImageDeltaCheck)
+      throws Exception {
     final Configuration conf = new HdfsConfiguration();
     MiniQJMHACluster cluster = null;
     final Path foo = new Path("/foo");
@@ -450,6 +466,10 @@ public class TestRollingUpgrade {
       dfsCluster.restartNameNodes();
 
       dfsCluster.transitionToActive(0);
+
+      dfsCluster.getNameNode(0).getHttpServer()
+          .setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck);
+
       DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
       dfs.mkdirs(foo);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org