You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by so...@apache.org on 2021/04/28 13:41:27 UTC
[ozone] branch master updated: HDDS-5153. Decommissioning a dead
node should complete immediately (#2190)
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new a920f25 HDDS-5153. Decommissioning a dead node should complete immediately (#2190)
a920f25 is described below
commit a920f25de422181d839bd7166c353edc17f30cf2
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Apr 28 14:41:05 2021 +0100
HDDS-5153. Decommissioning a dead node should complete immediately (#2190)
---
.../hdds/scm/node/NodeDecommissionManager.java | 31 ++++++++++++++------
.../hdds/scm/node/TestNodeDecommissionManager.java | 33 ++++++++++++++++++++++
2 files changed, 56 insertions(+), 8 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index 33c9697..8462ac7 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -268,11 +269,18 @@ public class NodeDecommissionManager {
throws NodeNotFoundException, InvalidNodeStateException {
NodeStatus nodeStatus = getNodeStatus(dn);
NodeOperationalState opState = nodeStatus.getOperationalState();
+ HddsProtos.NodeState health = nodeStatus.getHealth();
if (opState == NodeOperationalState.IN_SERVICE) {
- LOG.info("Starting Decommission for node {}", dn);
- nodeManager.setNodeOperationalState(
- dn, NodeOperationalState.DECOMMISSIONING);
- monitor.startMonitoring(dn);
+ if (health != HddsProtos.NodeState.DEAD) {
+ LOG.info("Starting Decommission for node {}", dn);
+ nodeManager.setNodeOperationalState(
+ dn, NodeOperationalState.DECOMMISSIONING);
+ monitor.startMonitoring(dn);
+ } else {
+ LOG.info("{} is dead. Moving to decommissioned immediately", dn);
+ nodeManager.setNodeOperationalState(
+ dn, NodeOperationalState.DECOMMISSIONED);
+ }
} else if (nodeStatus.isDecommission()) {
LOG.info("Start Decommission called on node {} in state {}. Nothing to "+
"do.", dn, opState);
@@ -354,11 +362,18 @@ public class NodeDecommissionManager {
maintenanceEnd =
(System.currentTimeMillis() / 1000L) + (endInHours * 60L * 60L);
}
+ HddsProtos.NodeState health = nodeStatus.getHealth();
if (opState == NodeOperationalState.IN_SERVICE) {
- nodeManager.setNodeOperationalState(
- dn, NodeOperationalState.ENTERING_MAINTENANCE, maintenanceEnd);
- monitor.startMonitoring(dn);
- LOG.info("Starting Maintenance for node {}", dn);
+ if (health != HddsProtos.NodeState.DEAD) {
+ nodeManager.setNodeOperationalState(
+ dn, NodeOperationalState.ENTERING_MAINTENANCE, maintenanceEnd);
+ monitor.startMonitoring(dn);
+ LOG.info("Starting Maintenance for node {}", dn);
+ } else {
+ LOG.info("{} is dead. Moving to maintenance immediately", dn);
+ nodeManager.setNodeOperationalState(
+ dn, NodeOperationalState.IN_MAINTENANCE);
+ }
} else if (nodeStatus.isMaintenance()) {
LOG.info("Starting Maintenance called on node {} with state {}. "+
"Nothing to do.", dn, opState);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
index 665c3f7..5b84051 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
+import org.apache.hadoop.hdds.scm.container.SimpleMockNodeManager;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
@@ -177,6 +178,22 @@ public class TestNodeDecommissionManager {
}
@Test
+ public void testDeadNodeDecommissionsImmediately()
+ throws NodeNotFoundException, InvalidNodeStateException {
+ List<DatanodeDetails> dns = generateDatanodes();
+ DatanodeDetails dn = dns.get(1);
+
+ SimpleMockNodeManager mockNM = new SimpleMockNodeManager();
+ mockNM.register(dn, NodeStatus.inServiceDead());
+ NodeDecommissionManager decomMgr = new NodeDecommissionManager(conf, mockNM,
+ null, SCMContext.emptyContext(), new EventQueue(), null);
+
+ decomMgr.startDecommission(dns.get(1));
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+ mockNM.getNodeStatus(dns.get(1)).getOperationalState());
+ }
+
+ @Test
public void testNodesCanBePutIntoMaintenanceAndRecommissioned()
throws InvalidHostStringException, NodeNotFoundException {
List<DatanodeDetails> dns = generateDatanodes();
@@ -220,6 +237,22 @@ public class TestNodeDecommissionManager {
}
@Test
+ public void testDeadNodeGoesToMaintenanceImmediately()
+ throws NodeNotFoundException, InvalidNodeStateException {
+ List<DatanodeDetails> dns = generateDatanodes();
+ DatanodeDetails dn = dns.get(1);
+
+ SimpleMockNodeManager mockNM = new SimpleMockNodeManager();
+ mockNM.register(dn, NodeStatus.inServiceDead());
+ NodeDecommissionManager decomMgr = new NodeDecommissionManager(conf, mockNM,
+ null, SCMContext.emptyContext(), new EventQueue(), null);
+
+ decomMgr.startMaintenance(dns.get(1), 0);
+ assertEquals(HddsProtos.NodeOperationalState.IN_MAINTENANCE,
+ mockNM.getNodeStatus(dns.get(1)).getOperationalState());
+ }
+
+ @Test
public void testNodesCannotTransitionFromDecomToMaint() throws Exception {
List<DatanodeDetails> dns = generateDatanodes();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org