You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by si...@apache.org on 2021/02/19 13:33:21 UTC
[ozone] branch master updated: HDDS-4845. Update NodeStatus
OperationalState for Datanodes in Recon (#1939)
This is an automated email from the ASF dual-hosted git repository.
siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 96e27a5 HDDS-4845. Update NodeStatus OperationalState for Datanodes in Recon (#1939)
96e27a5 is described below
commit 96e27a52ff199f77b0553ec051ce86ad3bad002a
Author: Siyao Meng <50...@users.noreply.github.com>
AuthorDate: Fri Feb 19 05:33:01 2021 -0800
HDDS-4845. Update NodeStatus OperationalState for Datanodes in Recon (#1939)
---
.../hadoop/hdds/scm/node/SCMNodeManager.java | 18 ++++++++----
.../hadoop/ozone/recon/TestReconAsPassiveScm.java | 2 ++
.../hadoop/ozone/recon/scm/ReconNodeManager.java | 20 +++++++++----
.../ozone/recon/scm/TestReconNodeManager.java | 33 ++++++++++++++++++++--
4 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index fcd72cf..c03990f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -155,6 +155,10 @@ public class SCMNodeManager implements NodeManager {
}
}
+ protected NodeStateManager getNodeStateManager() {
+ return nodeStateManager;
+ }
+
/**
* Returns all datanode that are in the given state. This function works by
* taking a snapshot of the current collection and then returning the list
@@ -399,6 +403,12 @@ public class SCMNodeManager implements NodeManager {
return commandQueue.getCommand(datanodeDetails.getUuid());
}
+ boolean opStateDiffers(DatanodeDetails dnDetails, NodeStatus nodeStatus) {
+ return nodeStatus.getOperationalState() != dnDetails.getPersistedOpState()
+ || nodeStatus.getOpStateExpiryEpochSeconds()
+ != dnDetails.getPersistedOpStateExpiryEpochSec();
+ }
+
/**
* If the operational state or expiry reported in the datanode heartbeat do
* not match those store in SCM, queue a command to update the state persisted
@@ -410,12 +420,10 @@ public class SCMNodeManager implements NodeManager {
* @param reportedDn The DatanodeDetails taken from the node heartbeat.
* @throws NodeNotFoundException
*/
- private void updateDatanodeOpState(DatanodeDetails reportedDn)
+ protected void updateDatanodeOpState(DatanodeDetails reportedDn)
throws NodeNotFoundException {
NodeStatus scmStatus = getNodeStatus(reportedDn);
- if (scmStatus.getOperationalState() != reportedDn.getPersistedOpState()
- || scmStatus.getOpStateExpiryEpochSeconds()
- != reportedDn.getPersistedOpStateExpiryEpochSec()) {
+ if (opStateDiffers(reportedDn, scmStatus)) {
LOG.info("Scheduling a command to update the operationalState " +
"persisted on {} as the reported value does not " +
"match the value stored in SCM ({}, {})",
@@ -802,7 +810,7 @@ public class SCMNodeManager implements NodeManager {
*/
@Override
public void onMessage(CommandForDatanode commandForDatanode,
- EventPublisher ignored) {
+ EventPublisher ignored) {
addDatanodeCommand(commandForDatanode.getDatanodeId(),
commandForDatanode.getCommand());
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
index 6e3dfe3..b79eb2b 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
@@ -53,6 +53,7 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.Timeout;
+import org.slf4j.event.Level;
/**
* Recon's passive SCM integration tests.
@@ -139,6 +140,7 @@ public class TestReconAsPassiveScm {
GenericTestUtils.LogCapturer logCapturer =
GenericTestUtils.LogCapturer.captureLogs(ReconNodeManager.LOG);
+ GenericTestUtils.setLogLevel(ReconNodeManager.LOG, Level.DEBUG);
reconScm.getEventQueue().fireEvent(CLOSE_CONTAINER,
containerInfo.containerID());
GenericTestUtils.waitFor(() -> logCapturer.getOutput()
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
index 1979550..218f717 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.utils.db.Table;
@@ -111,13 +112,12 @@ public class ReconNodeManager extends SCMNodeManager {
@Override
public void onMessage(CommandForDatanode commandForDatanode,
EventPublisher ignored) {
- if (ALLOWED_COMMANDS.contains(
- commandForDatanode.getCommand().getType())) {
+ final Type cmdType = commandForDatanode.getCommand().getType();
+ if (ALLOWED_COMMANDS.contains(cmdType)) {
super.onMessage(commandForDatanode, ignored);
} else {
- LOG.info("Ignoring unsupported command {} for Datanode {}.",
- commandForDatanode.getCommand().getType(),
- commandForDatanode.getDatanodeId());
+ LOG.debug("Ignoring unsupported command {} for Datanode {}.",
+ cmdType, commandForDatanode.getDatanodeId());
}
}
@@ -137,4 +137,14 @@ public class ReconNodeManager extends SCMNodeManager {
.filter(c -> ALLOWED_COMMANDS.contains(c.getType()))
.collect(toList());
}
+
+ @Override
+ protected void updateDatanodeOpState(DatanodeDetails reportedDn)
+ throws NodeNotFoundException {
+ super.updateDatanodeOpState(reportedDn);
+ // Update NodeOperationalState in NodeStatus to keep it consistent for Recon
+ super.getNodeStateManager().setNodeOperationalState(reportedDn,
+ reportedDn.getPersistedOpState(),
+ reportedDn.getPersistedOpStateExpiryEpochSec());
+ }
}
diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
index ee7f8ca..0cacd7a 100644
--- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
+++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
@@ -74,7 +75,7 @@ public class TestReconNodeManager {
}
@Test
- public void testReconNodeDB() throws IOException {
+ public void testReconNodeDB() throws IOException, NodeNotFoundException {
ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf);
EventQueue eventQueue = new EventQueue();
NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
@@ -109,6 +110,18 @@ public class TestReconNodeManager {
reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(),
new ReregisterCommand());
+ // OperationalState sanity check
+ final DatanodeDetails dnDetails =
+ reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString());
+ assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
+ dnDetails.getPersistedOpState());
+ assertEquals(dnDetails.getPersistedOpState(),
+ reconNodeManager.getNodeStatus(dnDetails)
+ .getOperationalState());
+ assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(),
+ reconNodeManager.getNodeStatus(dnDetails)
+ .getOpStateExpiryEpochSeconds());
+
// Upon processing the heartbeat, the illegal command should be filtered out
List<SCMCommand> returnedCmds =
reconNodeManager.processHeartbeat(datanodeDetails);
@@ -116,6 +129,22 @@ public class TestReconNodeManager {
assertEquals(SCMCommandProto.Type.reregisterCommand,
returnedCmds.get(0).getType());
+ // Now feed a DECOMMISSIONED heartbeat of the same DN
+ datanodeDetails.setPersistedOpState(
+ HddsProtos.NodeOperationalState.DECOMMISSIONED);
+ datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L);
+ reconNodeManager.processHeartbeat(datanodeDetails);
+ // Check both persistedOpState and NodeStatus#operationalState
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+ dnDetails.getPersistedOpState());
+ assertEquals(dnDetails.getPersistedOpState(),
+ reconNodeManager.getNodeStatus(dnDetails)
+ .getOperationalState());
+ assertEquals(12345L, dnDetails.getPersistedOpStateExpiryEpochSec());
+ assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(),
+ reconNodeManager.getNodeStatus(dnDetails)
+ .getOpStateExpiryEpochSeconds());
+
// Close the DB, and recreate the instance of Recon Node Manager.
eventQueue.close();
reconNodeManager.close();
@@ -127,4 +156,4 @@ public class TestReconNodeManager {
assertNotNull(
reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()));
}
-}
\ No newline at end of file
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org