You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2016/02/22 20:47:29 UTC

[10/50] [abbrv] hadoop git commit: HDFS-9787. SNNs stop uploading FSImage to ANN once isPrimaryCheckPointer changed to false. (Contributed by Guocui Mi)

HDFS-9787. SNNs stop uploading FSImage to ANN once isPrimaryCheckPointer changed to false. (Contributed by Guocui Mi)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2536ece7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2536ece7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2536ece7

Branch: refs/heads/HDFS-1312
Commit: 2536ece7b67ccfeac44314615a5f9dbe771ce373
Parents: 96ea309
Author: Vinayakumar B <vi...@apache.org>
Authored: Wed Feb 17 10:41:24 2016 +0530
Committer: Vinayakumar B <vi...@apache.org>
Committed: Wed Feb 17 10:41:24 2016 +0530

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../server/namenode/ha/StandbyCheckpointer.java |  7 +++-
 .../namenode/ha/TestStandbyCheckpoints.java     | 42 ++++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2536ece7/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 0b220bf..79369da 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -935,6 +935,9 @@ Trunk (Unreleased)
       HDFS-9755. Erasure Coding: allow to use multiple EC policies in striping
       related tests [Part 2]. (Rui Li via zhz)
 
+      HDFS-9787. SNNs stop uploading FSImage to ANN once isPrimaryCheckPointer
+      changed to false. (Guocui Mi via vinayakumarb)
+
 Release 2.9.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2536ece7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index f5ecbec..099a6aa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -65,6 +65,7 @@ public class StandbyCheckpointer {
   private final Configuration conf;
   private final FSNamesystem namesystem;
   private long lastCheckpointTime;
+  private long lastUploadTime;
   private final CheckpointerThread thread;
   private final ThreadFactory uploadThreadFactory;
   private List<URL> activeNNAddresses;
@@ -252,6 +253,7 @@ public class StandbyCheckpointer {
         break;
       }
     }
+    lastUploadTime = monotonicNow();
 
     // we are primary if we successfully updated the ANN
     this.isPrimaryCheckPointer = success;
@@ -362,6 +364,7 @@ public class StandbyCheckpointer {
       // Reset checkpoint time so that we don't always checkpoint
       // on startup.
       lastCheckpointTime = monotonicNow();
+      lastUploadTime = monotonicNow();
       while (shouldRun) {
         boolean needRollbackCheckpoint = namesystem.isNeedRollbackFsImage();
         if (!needRollbackCheckpoint) {
@@ -414,7 +417,9 @@ public class StandbyCheckpointer {
 
             // on all nodes, we build the checkpoint. However, we only ship the checkpoint if have a
             // rollback request, are the checkpointer, are outside the quiet period.
-            boolean sendRequest = isPrimaryCheckPointer || secsSinceLast >= checkpointConf.getQuietPeriod();
+            final long secsSinceLastUpload = (now - lastUploadTime) / 1000;
+            boolean sendRequest = isPrimaryCheckPointer
+                || secsSinceLastUpload >= checkpointConf.getQuietPeriod();
             doCheckpoint(sendRequest);
 
             // reset needRollbackCheckpoint to false only when we finish a ckpt

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2536ece7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 37d346a..bdeeab5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -458,6 +458,48 @@ public class TestStandbyCheckpoints {
     t.join();
   }
 
+  /**
+   * Test for the case standby NNs can upload FSImage to ANN after
+   * become non-primary standby NN. HDFS-9787
+   */
+  @Test(timeout=300000)
+  public void testNonPrimarySBNUploadFSImage() throws Exception {
+    // Shutdown all standby NNs.
+    for (int i = 1; i < NUM_NNS; i++) {
+      cluster.shutdownNameNode(i);
+
+      // Checkpoint as fast as we can, in a tight loop.
+      cluster.getConfiguration(i).setInt(
+        DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1);
+    }
+
+    doEdits(0, 10);
+    cluster.transitionToStandby(0);
+
+    // Standby NNs do checkpoint without active NN available.
+    for (int i = 1; i < NUM_NNS; i++) {
+      cluster.restartNameNode(i, false);
+    }
+    cluster.waitClusterUp();
+
+    for (int i = 0; i < NUM_NNS; i++) {
+      // Once the standby catches up, it should do a checkpoint
+      // and save to local directories.
+      HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+    }
+
+    cluster.transitionToActive(0);
+
+    // Wait for 2 seconds to expire last upload time.
+    Thread.sleep(2000);
+
+    doEdits(11, 20);
+    nns[0].getRpcServer().rollEditLog();
+
+    // One of standby NNs should also upload it back to the active.
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(23));
+  }
+
   private void doEdits(int start, int stop) throws IOException {
     for (int i = start; i < stop; i++) {
       Path p = new Path("/test" + i);