You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2020/06/28 08:24:10 UTC
[hadoop] branch branch-2.10 updated: HDFS-15421. IBR leak causes
standby NN to be stuck in safe mode.
This is an automated email from the ASF dual-hosted git repository.
aajisaka pushed a commit to branch branch-2.10
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2.10 by this push:
new fb694e8 HDFS-15421. IBR leak causes standby NN to be stuck in safe mode.
fb694e8 is described below
commit fb694e87a65d5f9613449827590463ce16f91a7f
Author: Akira Ajisaka <aa...@apache.org>
AuthorDate: Sun Jun 28 16:02:47 2020 +0900
HDFS-15421. IBR leak causes standby NN to be stuck in safe mode.
(cherry picked from commit c71ce7ac3370e220995bad0ae8b59d962c8d30a7)
---
.../hdfs/server/namenode/FSDirTruncateOp.java | 6 +-
.../hdfs/server/namenode/FSEditLogLoader.java | 8 +-
...ockTailing.java => TestUpdateBlockTailing.java} | 116 ++++++++++++++++++++-
3 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
index 9d3b3d9..3ec980b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java
@@ -251,7 +251,11 @@ final class FSDirTruncateOp {
uc.setTruncateBlock(new BlockInfoContiguous(oldBlock,
oldBlock.getReplication()));
uc.getTruncateBlock().setNumBytes(oldBlock.getNumBytes() - lastBlockDelta);
- uc.getTruncateBlock().setGenerationStamp(newBlock.getGenerationStamp());
+ final long newGenerationStamp = newBlock.getGenerationStamp();
+ uc.getTruncateBlock().setGenerationStamp(newGenerationStamp);
+ // Update global generation stamp in Standby NameNode
+ blockManager.getBlockIdManager().setGenerationStampIfGreater(
+ newGenerationStamp);
truncatedBlockUC = oldBlock;
NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: " +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 54f2c5c..a849503 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -1062,8 +1062,12 @@ public class FSEditLogLoader {
oldBlock.setNumBytes(newBlock.getNumBytes());
boolean changeMade =
oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
- oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
-
+ final long newGenerationStamp = newBlock.getGenerationStamp();
+ oldBlock.setGenerationStamp(newGenerationStamp);
+ // Update global generation stamp in Standby NameNode
+ fsNamesys.getBlockManager().getBlockIdManager().
+ setGenerationStampIfGreater(newGenerationStamp);
+
if (!oldBlock.isComplete() &&
(!isLastBlock || op.shouldCompleteLastBlock())) {
changeMade = true;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
similarity index 61%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
index 6c0a04e..70cfe43 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestAddBlockTailing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestUpdateBlockTailing.java
@@ -22,9 +22,13 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
+import java.util.EnumSet;
+import java.util.concurrent.ThreadLocalRandom;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -43,17 +47,18 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
import org.junit.AfterClass;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
/**
- * Tests the race condition that IBR and add block may result
+ * Tests the race condition that IBR and update block may result
* in inconsistent block genstamp.
*/
-public class TestAddBlockTailing {
+public class TestUpdateBlockTailing {
private static final int BLOCK_SIZE = 8192;
- private static final String TEST_DIR = "/TestAddBlockTailing";
+ private static final String TEST_DIR = "/TestUpdateBlockTailing";
private static MiniQJMHACluster qjmhaCluster;
private static MiniDFSCluster dfsCluster;
@@ -87,6 +92,12 @@ public class TestAddBlockTailing {
}
}
+ @Before
+ public void reset() throws Exception {
+ dfsCluster.transitionToStandby(1);
+ dfsCluster.transitionToActive(0);
+ }
+
@Test
public void testStandbyAddBlockIBRRace() throws Exception {
String testFile = TEST_DIR +"/testStandbyAddBlockIBRRace";
@@ -161,4 +172,103 @@ public class TestAddBlockTailing {
rpc1.delete(testFile, false);
}
+
+ @Test
+ public void testStandbyAppendBlock() throws Exception {
+ final String testFile = TEST_DIR +"/testStandbyAppendBlock";
+ final long fileLen = 1 << 16;
+ // Create a file
+ DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+ // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Append block without newBlock flag
+ try (FSDataOutputStream out = dfs.append(new Path(testFile))) {
+ final byte[] data = new byte[1 << 16];
+ ThreadLocalRandom.current().nextBytes(data);
+ out.write(data);
+ }
+
+ // NN1 tails OP_APPEND, OP_SET_GENSTAMP_V2, and OP_UPDATE_BLOCKS
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Remove the testFile
+ final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+ rpc0.delete(testFile, false);
+ }
+
+ @Test
+ public void testStandbyAppendNewBlock() throws Exception {
+ final String testFile = TEST_DIR +"/testStandbyAppendNewBlock";
+ final long fileLen = 1 << 16;
+ // Create a file
+ DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+ // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Append block with newBlock flag
+ try (FSDataOutputStream out = dfs.append(new Path(testFile),
+ EnumSet.of(CreateFlag.APPEND, CreateFlag.NEW_BLOCK), 4096, null)) {
+ final byte[] data = new byte[1 << 16];
+ ThreadLocalRandom.current().nextBytes(data);
+ out.write(data);
+ }
+
+ // NN1 tails OP_APPEND, OP_SET_GENSTAMP_V2, and OP_ADD_BLOCK
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Remove the testFile
+ final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+ rpc0.delete(testFile, false);
+ }
+
+ @Test
+ public void testStandbyTruncateBlock() throws Exception {
+ final String testFile = TEST_DIR +"/testStandbyTruncateBlock";
+ final long fileLen = 1 << 16;
+ // Create a file
+ DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short)1, 0);
+ // NN1 tails OP_SET_GENSTAMP_V2 and OP_ADD_BLOCK
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Truncate block
+ dfs.truncate(new Path(testFile), fileLen/2);
+
+ // NN1 tails OP_SET_GENSTAMP_V2 and OP_TRUNCATE
+ fsn0.getEditLog().logSync();
+ fsn1.getEditLogTailer().doTailEdits();
+ assertEquals("Global Generation stamps on NN0 and "
+ + "NN1 should be equal",
+ NameNodeAdapter.getGenerationStamp(fsn0),
+ NameNodeAdapter.getGenerationStamp(fsn1));
+
+ // Remove the testFile
+ final ClientProtocol rpc0 = dfsCluster.getNameNode(0).getRpcServer();
+ rpc0.delete(testFile, false);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org