You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ta...@apache.org on 2022/04/14 02:52:34 UTC
[hadoop] branch branch-3.2 updated: HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
This is an automated email from the ASF dual-hosted git repository.
tasanuma pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new b8c6ba60463 HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
b8c6ba60463 is described below
commit b8c6ba60463cf835ec20f99244a827a3d3056165
Author: Takanobu Asanuma <ta...@apache.org>
AuthorDate: Thu Apr 14 11:23:38 2022 +0900
HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
(cherry picked from commit 2efab92959ca0a68c52ed6a9c721704e57afbcc7)
---
.../hdfs/server/blockmanagement/BlockManager.java | 10 +++
.../server/blockmanagement/TestBlockManager.java | 96 ++++++++++++++++++++++
2 files changed, 106 insertions(+)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 25a47cd433e..9546be16d75 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -2060,6 +2060,16 @@ public class BlockManager implements BlockStatsMXBean {
return null;
}
+ // skip if source datanodes for reconstructing ec block are not enough
+ if (block.isStriped()) {
+ BlockInfoStriped stripedBlock = (BlockInfoStriped) block;
+ if (stripedBlock.getRealDataBlockNum() > srcNodes.length) {
+ LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block);
+ NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
+ return null;
+ }
+ }
+
// liveReplicaNodes can include READ_ONLY_SHARED replicas which are
// not included in the numReplicas.liveReplicas() count
assert liveReplicaNodes.size() >= numReplicas.liveReplicas();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
index d42d2d98e72..65e6c356620 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@@ -842,6 +842,102 @@ public class TestBlockManager {
0, numReplicas.redundantInternalBlocks());
}
+ @Test
+ public void testSkipReconstructionWithManyBusyNodes() {
+ long blockId = -9223372036854775776L; // real ec block id
+ // RS-3-2 EC policy
+ ErasureCodingPolicy ecPolicy =
+ SystemErasureCodingPolicies.getPolicies().get(1);
+
+ // create an EC block group: 3 data blocks + 2 parity blocks
+ Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
+ BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
+
+ // create 4 storageInfo, which means 1 block is missing
+ DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage1", "1.1.1.1", "rack1", "host1");
+ DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage2", "2.2.2.2", "rack2", "host2");
+ DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage3", "3.3.3.3", "rack3", "host3");
+ DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage4", "4.4.4.4", "rack4", "host4");
+
+ // link block with storage
+ aBlockInfoStriped.addStorage(ds1, aBlockGroup);
+ aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
+ aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
+ aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
+
+ addEcBlockToBM(blockId, ecPolicy);
+ aBlockInfoStriped.setBlockCollectionId(mockINodeId);
+
+ // reconstruction should be scheduled
+ BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
+ assertNotNull(work);
+
+ // simulate the 2 nodes reach maxReplicationStreams
+ for(int i = 0; i < bm.maxReplicationStreams; i++){
+ ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
+ ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
+ }
+
+ // reconstruction should be skipped since the number of non-busy nodes are not enough
+ work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
+ assertNull(work);
+ }
+
+ @Test
+ public void testSkipReconstructionWithManyBusyNodes2() {
+ long blockId = -9223372036854775776L; // real ec block id
+ // RS-3-2 EC policy
+ ErasureCodingPolicy ecPolicy =
+ SystemErasureCodingPolicies.getPolicies().get(1);
+
+ // create an EC block group: 2 data blocks + 2 parity blocks
+ Block aBlockGroup = new Block(blockId,
+ ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0);
+ BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
+
+ // create 3 storageInfo, which means 1 block is missing
+ DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage1", "1.1.1.1", "rack1", "host1");
+ DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage2", "2.2.2.2", "rack2", "host2");
+ DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
+ "storage3", "3.3.3.3", "rack3", "host3");
+
+ // link block with storage
+ aBlockInfoStriped.addStorage(ds1, aBlockGroup);
+ aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
+ aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
+
+ addEcBlockToBM(blockId, ecPolicy);
+ aBlockInfoStriped.setBlockCollectionId(mockINodeId);
+
+ // reconstruction should be scheduled
+ BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
+ assertNotNull(work);
+
+ // simulate the 1 node reaches maxReplicationStreams
+ for(int i = 0; i < bm.maxReplicationStreams; i++){
+ ds2.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
+ }
+
+ // reconstruction should still be scheduled since there are 2 source nodes to create 2 blocks
+ work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
+ assertNotNull(work);
+
+ // simulate the 1 more node reaches maxReplicationStreams
+ for(int i = 0; i < bm.maxReplicationStreams; i++){
+ ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
+ }
+
+ // reconstruction should be skipped since the number of non-busy nodes are not enough
+ work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
+ assertNull(work);
+ }
+
@Test
public void testFavorDecomUntilHardLimit() throws Exception {
bm.maxReplicationStreams = 0;
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org