You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by si...@apache.org on 2021/02/19 13:33:21 UTC

[ozone] branch master updated: HDDS-4845. Update NodeStatus OperationalState for Datanodes in Recon (#1939)

This is an automated email from the ASF dual-hosted git repository.

siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 96e27a5  HDDS-4845. Update NodeStatus OperationalState for Datanodes in Recon (#1939)
96e27a5 is described below

commit 96e27a52ff199f77b0553ec051ce86ad3bad002a
Author: Siyao Meng <50...@users.noreply.github.com>
AuthorDate: Fri Feb 19 05:33:01 2021 -0800

    HDDS-4845. Update NodeStatus OperationalState for Datanodes in Recon (#1939)
---
 .../hadoop/hdds/scm/node/SCMNodeManager.java       | 18 ++++++++----
 .../hadoop/ozone/recon/TestReconAsPassiveScm.java  |  2 ++
 .../hadoop/ozone/recon/scm/ReconNodeManager.java   | 20 +++++++++----
 .../ozone/recon/scm/TestReconNodeManager.java      | 33 ++++++++++++++++++++--
 4 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index fcd72cf..c03990f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -155,6 +155,10 @@ public class SCMNodeManager implements NodeManager {
     }
   }
 
+  protected NodeStateManager getNodeStateManager() {
+    return nodeStateManager;
+  }
+
   /**
    * Returns all datanode that are in the given state. This function works by
    * taking a snapshot of the current collection and then returning the list
@@ -399,6 +403,12 @@ public class SCMNodeManager implements NodeManager {
     return commandQueue.getCommand(datanodeDetails.getUuid());
   }
 
+  boolean opStateDiffers(DatanodeDetails dnDetails, NodeStatus nodeStatus) {
+    return nodeStatus.getOperationalState() != dnDetails.getPersistedOpState()
+        || nodeStatus.getOpStateExpiryEpochSeconds()
+        != dnDetails.getPersistedOpStateExpiryEpochSec();
+  }
+
   /**
    * If the operational state or expiry reported in the datanode heartbeat do
    * not match those store in SCM, queue a command to update the state persisted
@@ -410,12 +420,10 @@ public class SCMNodeManager implements NodeManager {
    * @param reportedDn The DatanodeDetails taken from the node heartbeat.
    * @throws NodeNotFoundException
    */
-  private void updateDatanodeOpState(DatanodeDetails reportedDn)
+  protected void updateDatanodeOpState(DatanodeDetails reportedDn)
       throws NodeNotFoundException {
     NodeStatus scmStatus = getNodeStatus(reportedDn);
-    if (scmStatus.getOperationalState() != reportedDn.getPersistedOpState()
-        || scmStatus.getOpStateExpiryEpochSeconds()
-        != reportedDn.getPersistedOpStateExpiryEpochSec()) {
+    if (opStateDiffers(reportedDn, scmStatus)) {
       LOG.info("Scheduling a command to update the operationalState " +
           "persisted on {} as the reported value does not " +
           "match the value stored in SCM ({}, {})",
@@ -802,7 +810,7 @@ public class SCMNodeManager implements NodeManager {
    */
   @Override
   public void onMessage(CommandForDatanode commandForDatanode,
-      EventPublisher ignored) {
+                        EventPublisher ignored) {
     addDatanodeCommand(commandForDatanode.getDatanodeId(),
         commandForDatanode.getCommand());
   }
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
index 6e3dfe3..b79eb2b 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAsPassiveScm.java
@@ -53,6 +53,7 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.junit.rules.Timeout;
+import org.slf4j.event.Level;
 
 /**
  * Recon's passive SCM integration tests.
@@ -139,6 +140,7 @@ public class TestReconAsPassiveScm {
 
     GenericTestUtils.LogCapturer logCapturer =
         GenericTestUtils.LogCapturer.captureLogs(ReconNodeManager.LOG);
+    GenericTestUtils.setLogLevel(ReconNodeManager.LOG, Level.DEBUG);
     reconScm.getEventQueue().fireEvent(CLOSE_CONTAINER,
         containerInfo.containerID());
     GenericTestUtils.waitFor(() -> logCapturer.getOutput()
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
index 1979550..218f717 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
 import org.apache.hadoop.hdds.scm.net.NetworkTopology;
 import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.hdds.utils.db.Table;
@@ -111,13 +112,12 @@ public class ReconNodeManager extends SCMNodeManager {
   @Override
   public void onMessage(CommandForDatanode commandForDatanode,
                         EventPublisher ignored) {
-    if (ALLOWED_COMMANDS.contains(
-        commandForDatanode.getCommand().getType())) {
+    final Type cmdType = commandForDatanode.getCommand().getType();
+    if (ALLOWED_COMMANDS.contains(cmdType)) {
       super.onMessage(commandForDatanode, ignored);
     } else {
-      LOG.info("Ignoring unsupported command {} for Datanode {}.",
-          commandForDatanode.getCommand().getType(),
-          commandForDatanode.getDatanodeId());
+      LOG.debug("Ignoring unsupported command {} for Datanode {}.",
+          cmdType, commandForDatanode.getDatanodeId());
     }
   }
 
@@ -137,4 +137,14 @@ public class ReconNodeManager extends SCMNodeManager {
         .filter(c -> ALLOWED_COMMANDS.contains(c.getType()))
         .collect(toList());
   }
+
+  @Override
+  protected void updateDatanodeOpState(DatanodeDetails reportedDn)
+      throws NodeNotFoundException {
+    super.updateDatanodeOpState(reportedDn);
+    // Update NodeOperationalState in NodeStatus to keep it consistent for Recon
+    super.getNodeStateManager().setNodeOperationalState(reportedDn,
+        reportedDn.getPersistedOpState(),
+        reportedDn.getPersistedOpStateExpiryEpochSec());
+  }
 }
diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
index ee7f8ca..0cacd7a 100644
--- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
+++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
 import org.apache.hadoop.hdds.scm.net.NetworkTopology;
 import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.server.events.EventQueue;
 import org.apache.hadoop.hdds.utils.db.DBStore;
 import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
@@ -74,7 +75,7 @@ public class TestReconNodeManager {
   }
 
   @Test
-  public void testReconNodeDB() throws IOException {
+  public void testReconNodeDB() throws IOException, NodeNotFoundException {
     ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf);
     EventQueue eventQueue = new EventQueue();
     NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
@@ -109,6 +110,18 @@ public class TestReconNodeManager {
     reconNodeManager.addDatanodeCommand(datanodeDetails.getUuid(),
         new ReregisterCommand());
 
+    // OperationalState sanity check
+    final DatanodeDetails dnDetails =
+        reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString());
+    assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
+        dnDetails.getPersistedOpState());
+    assertEquals(dnDetails.getPersistedOpState(),
+        reconNodeManager.getNodeStatus(dnDetails)
+            .getOperationalState());
+    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(),
+        reconNodeManager.getNodeStatus(dnDetails)
+            .getOpStateExpiryEpochSeconds());
+
     // Upon processing the heartbeat, the illegal command should be filtered out
     List<SCMCommand> returnedCmds =
         reconNodeManager.processHeartbeat(datanodeDetails);
@@ -116,6 +129,22 @@ public class TestReconNodeManager {
     assertEquals(SCMCommandProto.Type.reregisterCommand,
         returnedCmds.get(0).getType());
 
+    // Now feed a DECOMMISSIONED heartbeat of the same DN
+    datanodeDetails.setPersistedOpState(
+        HddsProtos.NodeOperationalState.DECOMMISSIONED);
+    datanodeDetails.setPersistedOpStateExpiryEpochSec(12345L);
+    reconNodeManager.processHeartbeat(datanodeDetails);
+    // Check both persistedOpState and NodeStatus#operationalState
+    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+        dnDetails.getPersistedOpState());
+    assertEquals(dnDetails.getPersistedOpState(),
+        reconNodeManager.getNodeStatus(dnDetails)
+            .getOperationalState());
+    assertEquals(12345L, dnDetails.getPersistedOpStateExpiryEpochSec());
+    assertEquals(dnDetails.getPersistedOpStateExpiryEpochSec(),
+        reconNodeManager.getNodeStatus(dnDetails)
+            .getOpStateExpiryEpochSeconds());
+
     // Close the DB, and recreate the instance of Recon Node Manager.
     eventQueue.close();
     reconNodeManager.close();
@@ -127,4 +156,4 @@ public class TestReconNodeManager {
     assertNotNull(
         reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()));
   }
-}
\ No newline at end of file
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org