You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by so...@apache.org on 2022/03/16 16:02:40 UTC

[ozone] branch HDDS-3816-ec updated: HDDS-6445. EC: Fix allocateBlock failure due to inaccurate excludedNodes check. (#3190)

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch HDDS-3816-ec
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-3816-ec by this push:
     new 03ff3f9  HDDS-6445. EC: Fix allocateBlock failure due to inaccurate excludedNodes check. (#3190)
03ff3f9 is described below

commit 03ff3f959262f32aa334f84dfca6a6493550fb74
Author: Gui Hecheng <ma...@tencent.com>
AuthorDate: Wed Mar 16 23:57:54 2022 +0800

    HDDS-6445. EC: Fix allocateBlock failure due to inaccurate excludedNodes check. (#3190)
---
 .../SCMContainerPlacementRackScatter.java          | 13 +++++++----
 .../TestSCMContainerPlacementRackScatter.java      | 25 ++++++++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
index 8503662..1902d51 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
 import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy;
 import org.apache.hadoop.hdds.scm.exceptions.SCMException;
-import org.apache.hadoop.hdds.scm.net.NetConstants;
 import org.apache.hadoop.hdds.scm.net.NetworkTopology;
 import org.apache.hadoop.hdds.scm.net.Node;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
@@ -99,11 +98,17 @@ public final class SCMContainerPlacementRackScatter
       throws SCMException {
     Preconditions.checkArgument(nodesRequired > 0);
     metrics.incrDatanodeRequestCount(nodesRequired);
-    int datanodeCount = networkTopology.getNumOfLeafNode(NetConstants.ROOT);
     int excludedNodesCount = excludedNodes == null ? 0 : excludedNodes.size();
-    if (datanodeCount < nodesRequired + excludedNodesCount) {
+    List<Node> availableNodes = networkTopology.getNodes(
+        networkTopology.getMaxLevel());
+    int totalNodesCount = availableNodes.size();
+    if (excludedNodes != null) {
+      availableNodes.removeAll(excludedNodes);
+    }
+    if (availableNodes.size() < nodesRequired) {
       throw new SCMException("No enough datanodes to choose. " +
-          "TotalNode = " + datanodeCount +
+          "TotalNode = " + totalNodesCount +
+          " AvailableNode = " + availableNodes.size() +
           " RequiredNode = " + nodesRequired +
           " ExcludedNode = " + excludedNodesCount, null);
     }
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
index 24eee13..1b45a75 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
@@ -52,6 +52,8 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN;
 import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA;
 import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA;
@@ -492,6 +494,29 @@ public class TestSCMContainerPlacementRackScatter {
     assertEquals(0, stat.misReplicationCount());
   }
 
+  @Test
+  public void testExcludedNodesOverlapsOutOfServiceNodes() throws SCMException {
+    assumeTrue(datanodeCount == 6);
+
+    // DN 5 is out of service
+    dnInfos.get(5).setNodeStatus(new NodeStatus(DECOMMISSIONED, HEALTHY));
+
+    // SCM should have detected that DN 5 is dead
+    cluster.remove(datanodes.get(5));
+
+    // Here we still have 5 DNs, so pick 5 should be possible
+    int nodeNum = 5;
+    List<DatanodeDetails> excludedNodes = new ArrayList<>();
+    // The DN 5 is out of service,
+    // but the client already has it in the excludeList.
+    // So there is an overlap.
+    excludedNodes.add(datanodes.get(5));
+
+    List<DatanodeDetails> datanodeDetails = policy.chooseDatanodes(
+        excludedNodes, null, nodeNum, 0, 5);
+    Assert.assertEquals(nodeNum, datanodeDetails.size());
+  }
+
   private int getRackSize(List<DatanodeDetails>... datanodeDetails) {
     Set<Node> racks = new HashSet<>();
     for (List<DatanodeDetails> list : datanodeDetails) {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org