You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by so...@apache.org on 2021/04/28 13:41:27 UTC

[ozone] branch master updated: HDDS-5153. Decommissioning a dead node should complete immediately (#2190)

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new a920f25  HDDS-5153. Decommissioning a dead node should complete immediately (#2190)
a920f25 is described below

commit a920f25de422181d839bd7166c353edc17f30cf2
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Apr 28 14:41:05 2021 +0100

    HDDS-5153. Decommissioning a dead node should complete immediately (#2190)
---
 .../hdds/scm/node/NodeDecommissionManager.java     | 31 ++++++++++++++------
 .../hdds/scm/node/TestNodeDecommissionManager.java | 33 ++++++++++++++++++++++
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index 33c9697..8462ac7 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
 import org.apache.hadoop.hdds.scm.DatanodeAdminError;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -268,11 +269,18 @@ public class NodeDecommissionManager {
       throws NodeNotFoundException, InvalidNodeStateException {
     NodeStatus nodeStatus = getNodeStatus(dn);
     NodeOperationalState opState = nodeStatus.getOperationalState();
+    HddsProtos.NodeState health = nodeStatus.getHealth();
     if (opState == NodeOperationalState.IN_SERVICE) {
-      LOG.info("Starting Decommission for node {}", dn);
-      nodeManager.setNodeOperationalState(
-          dn, NodeOperationalState.DECOMMISSIONING);
-      monitor.startMonitoring(dn);
+      if (health != HddsProtos.NodeState.DEAD) {
+        LOG.info("Starting Decommission for node {}", dn);
+        nodeManager.setNodeOperationalState(
+            dn, NodeOperationalState.DECOMMISSIONING);
+        monitor.startMonitoring(dn);
+      } else {
+        LOG.info("{} is dead. Moving to decommissioned immediately", dn);
+        nodeManager.setNodeOperationalState(
+            dn, NodeOperationalState.DECOMMISSIONED);
+      }
     } else if (nodeStatus.isDecommission()) {
       LOG.info("Start Decommission called on node {} in state {}. Nothing to "+
           "do.", dn, opState);
@@ -354,11 +362,18 @@ public class NodeDecommissionManager {
       maintenanceEnd =
           (System.currentTimeMillis() / 1000L) + (endInHours * 60L * 60L);
     }
+    HddsProtos.NodeState health = nodeStatus.getHealth();
     if (opState == NodeOperationalState.IN_SERVICE) {
-      nodeManager.setNodeOperationalState(
-          dn, NodeOperationalState.ENTERING_MAINTENANCE, maintenanceEnd);
-      monitor.startMonitoring(dn);
-      LOG.info("Starting Maintenance for node {}", dn);
+      if (health != HddsProtos.NodeState.DEAD) {
+        nodeManager.setNodeOperationalState(
+            dn, NodeOperationalState.ENTERING_MAINTENANCE, maintenanceEnd);
+        monitor.startMonitoring(dn);
+        LOG.info("Starting Maintenance for node {}", dn);
+      }  else {
+        LOG.info("{} is dead. Moving to maintenance immediately", dn);
+        nodeManager.setNodeOperationalState(
+            dn, NodeOperationalState.IN_MAINTENANCE);
+      }
     } else if (nodeStatus.isMaintenance()) {
       LOG.info("Starting Maintenance called on node {} with state {}. "+
           "Nothing to do.", dn, opState);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
index 665c3f7..5b84051 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.TestUtils;
 import org.apache.hadoop.hdds.scm.DatanodeAdminError;
+import org.apache.hadoop.hdds.scm.container.SimpleMockNodeManager;
 import org.apache.hadoop.hdds.scm.ha.SCMContext;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
@@ -177,6 +178,22 @@ public class TestNodeDecommissionManager {
   }
 
   @Test
+  public void testDeadNodeDecommissionsImmediately()
+      throws NodeNotFoundException, InvalidNodeStateException {
+    List<DatanodeDetails> dns = generateDatanodes();
+    DatanodeDetails dn = dns.get(1);
+
+    SimpleMockNodeManager mockNM = new SimpleMockNodeManager();
+    mockNM.register(dn, NodeStatus.inServiceDead());
+    NodeDecommissionManager decomMgr = new NodeDecommissionManager(conf, mockNM,
+        null, SCMContext.emptyContext(), new EventQueue(), null);
+
+    decomMgr.startDecommission(dns.get(1));
+    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+        mockNM.getNodeStatus(dns.get(1)).getOperationalState());
+  }
+
+  @Test
   public void testNodesCanBePutIntoMaintenanceAndRecommissioned()
       throws InvalidHostStringException, NodeNotFoundException {
     List<DatanodeDetails> dns = generateDatanodes();
@@ -220,6 +237,22 @@ public class TestNodeDecommissionManager {
   }
 
   @Test
+  public void testDeadNodeGoesToMaintenanceImmediately()
+      throws NodeNotFoundException, InvalidNodeStateException {
+    List<DatanodeDetails> dns = generateDatanodes();
+    DatanodeDetails dn = dns.get(1);
+
+    SimpleMockNodeManager mockNM = new SimpleMockNodeManager();
+    mockNM.register(dn, NodeStatus.inServiceDead());
+    NodeDecommissionManager decomMgr = new NodeDecommissionManager(conf, mockNM,
+        null, SCMContext.emptyContext(), new EventQueue(), null);
+
+    decomMgr.startMaintenance(dns.get(1), 0);
+    assertEquals(HddsProtos.NodeOperationalState.IN_MAINTENANCE,
+        mockNM.getNodeStatus(dns.get(1)).getOperationalState());
+  }
+
+  @Test
   public void testNodesCannotTransitionFromDecomToMaint() throws Exception {
     List<DatanodeDetails> dns = generateDatanodes();
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org