You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2020/06/28 07:43:28 UTC

[hadoop] branch branch-3.2 updated: HDFS-15421. IBR leak causes standby NN to be stuck in safe mode.

This is an automated email from the ASF dual-hosted git repository.

aajisaka pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 37e0a12  HDFS-15421. IBR leak causes standby NN to be stuck in safe mode.
37e0a12 is described below

commit 37e0a1208ceb591d461217529615971f8c1b136d
Author: Akira Ajisaka <aa...@apache.org>
AuthorDate: Sun Jun 28 16:02:47 2020 +0900

    HDFS-15421. IBR leak causes standby NN to be stuck in safe mode.
    
    (cherry picked from commit c71ce7ac3370e220995bad0ae8b59d962c8d30a7)
---
 .../hdfs/server/namenode/FSDirTruncateOp.java      |   6 +-
 .../hdfs/server/namenode/FSEditLogLoader.java      |   8 +-
 ...ockTailing.java => TestUpdateBlockTailing.java} | 116 ++++++++++++++++++++-
 3 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
index bf55d30..ee50ee9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
@@ -262,7 +262,11 @@ final class FSDirTruncateOp {
       uc.setTruncateBlock(new BlockInfoContiguous(oldBlock,
           oldBlock.getReplication()));
       uc.getTruncateBlock().setNumBytes(oldBlock.getNumBytes() - lastBlockDelta);
-      uc.getTruncateBlock().setGenerationStamp(newBlock.getGenerationStamp());
+      final long newGenerationStamp = newBlock.getGenerationStamp();
+      uc.getTruncateBlock().setGenerationStamp(newGenerationStamp);
+      // Update global generation stamp in Standby NameNode
+      blockManager.getBlockIdManager().setGenerationStampIfGreater(
+          newGenerationStamp);
       truncatedBlockUC = oldBlock;
 
       NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: " +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index ce9a7fa..c99ea70 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -1149,8 +1149,12 @@ public class FSEditLogLoader {
       oldBlock.setNumBytes(newBlock.getNumBytes());
       boolean changeMade =
         oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
-      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
-      
+      final long newGenerationStamp = newBlock.getGenerationStamp();
+      oldBlock.setGenerationStamp(newGenerationStamp);
+      // Update global generation stamp in Standby NameNode
+      fsNamesys.getBlockManager().getBlockIdManager().
+          setGenerationStampIfGreater(newGenerationStamp);
+
       if (!oldBlock.isComplete() &&
           (!isLastBlock || op.shouldCompleteLastBlock())) {
         changeMade = true;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
similarity index 61%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
index 48c09ed..1462314 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
@@ -22,9 +22,13 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.util.EnumSet;
+import java.util.concurrent.ThreadLocalRandom;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -43,17 +47,18 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 
 /**
- * Tests the race condition that IBR and add block may result
+ * Tests the race condition that IBR and update block may result
  * in inconsistent block genstamp.
  */
-public class TestAddBlockTailing {
+public class TestUpdateBlockTailing {
   private static final int BLOCK_SIZE = 8192;
-  private static final String TEST_DIR = "/TestAddBlockTailing";
+  private static final String TEST_DIR = "/TestUpdateBlockTailing";
 
   private static MiniQJMHACluster qjmhaCluster;
   private static MiniDFSCluster dfsCluster;
@@ -87,6 +92,12 @@ public class TestAddBlockTailing {
     }
   }
 
+  @Before
+  public void reset() throws Exception {
+    dfsCluster.transitionToStandby(1);
+    dfsCluster.transitionToActive(0);
+  }
+
   @Test
   public void testStandbyAddBlockIBRRace() throws Exception {
     String testFile = TEST_DIR +"/testStandbyAddBlockIBRRace";
@@ -161,4 +172,103 @@ public class TestAddBlockTailing {
 
     rpc1.delete(testFile, false);
   }
+
+  @Test
+  public void testStandbyAppendBlock() throws Exception {
+    final String testFile = TEST_DIR +"/testStandbyAppendBlock";
+    final long fileLen = 1 << 16;
+    // Create a file
+    DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+    // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Append block without newBlock flag
+    try (FSDataOutputStream out = dfs.append(new Path(testFile))) {
+      final byte[] data = new byte[1 << 16];
+      ThreadLocalRandom.current().nextBytes(data);
+      out.write(data);
+    }
+
+    // NN1 tails OP_APPEND, OP_SET_GENSTAMP_V2, and OP_UPDATE_BLOCKS
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Remove the testFile
+    final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+    rpc0.delete(testFile, false);
+  }
+
+  @Test
+  public void testStandbyAppendNewBlock() throws Exception {
+    final String testFile = TEST_DIR +"/testStandbyAppendNewBlock";
+    final long fileLen = 1 << 16;
+    // Create a file
+    DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+    // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Append block with newBlock flag
+    try (FSDataOutputStream out = dfs.append(new Path(testFile),
+        EnumSet.of(CreateFlag.APPEND, CreateFlag.NEW_BLOCK), 4096, null)) {
+      final byte[] data = new byte[1 << 16];
+      ThreadLocalRandom.current().nextBytes(data);
+      out.write(data);
+    }
+
+    // NN1 tails OP_APPEND, OP_SET_GENSTAMP_V2, and OP_ADD_BLOCK
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Remove the testFile
+    final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+    rpc0.delete(testFile, false);
+  }
+
+  @Test
+  public void testStandbyTruncateBlock() throws Exception {
+    final String testFile = TEST_DIR +"/testStandbyTruncateBlock";
+    final long fileLen = 1 << 16;
+    // Create a file
+    DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+    // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Truncate block
+    dfs.truncate(new Path(testFile), fileLen/2);
+
+    // NN1 tails OP_SET_GENSTAMP_V2 and OP_TRUNCATE
+    fsn0.getEditLog().logSync();
+    fsn1.getEditLogTailer().doTailEdits();
+    assertEquals("Global Generation stamps on NN0 and "
+            + "NN1 should be equal",
+        NameNodeAdapter.getGenerationStamp(fsn0),
+        NameNodeAdapter.getGenerationStamp(fsn1));
+
+    // Remove the testFile
+    final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+    rpc0.delete(testFile, false);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org