You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by av...@apache.org on 2021/07/23 03:13:12 UTC

[ozone] branch master updated: HDDS-5418. Let Recon send reregisterCommand to Datanodes if DatanodeDetails changed (#2392)

This is an automated email from the ASF dual-hosted git repository.

avijayan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 2759a9f  HDDS-5418. Let Recon send reregisterCommand to Datanodes if DatanodeDetails changed (#2392)
2759a9f is described below

commit 2759a9f4594a6ea3d967ab4232fb3dcac15d12c5
Author: Symious <yi...@foxmail.com>
AuthorDate: Fri Jul 23 11:12:54 2021 +0800

    HDDS-5418. Let Recon send reregisterCommand to Datanodes if DatanodeDetails changed (#2392)
---
 .../hadoop/hdds/protocol/DatanodeDetails.java      |   9 ++
 .../hadoop/ozone/recon/api/NodeEndpoint.java       |  10 +-
 .../hadoop/ozone/recon/scm/ReconNodeManager.java   | 106 ++++++++++++++++++++-
 .../src/views/datanodes/datanodes.tsx              |   8 +-
 .../ozone/recon/scm/TestReconNodeManager.java      |  38 ++++++++
 5 files changed, 161 insertions(+), 10 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
index 7faa741..aef3c29 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdds.protocol;
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 import java.util.UUID;
 
@@ -509,6 +510,14 @@ public class DatanodeDetails extends NodeImpl implements
     return uuid.hashCode();
   }
 
+  // Skip The OpStates which may change in Runtime.
+  public int getSignature() {
+    return Objects
+        .hash(uuid, uuidString, ipAddress, hostName, ports,
+            certSerialId, version, setupTime, revision, buildDate,
+            initialVersion, currentVersion);
+  }
+
   /**
    * Returns DatanodeDetails.Builder instance.
    *
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java
index 6438842..4e6ee3a 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NodeEndpoint.java
@@ -139,7 +139,7 @@ public class NodeEndpoint {
       }
 
       DatanodeInfo dnInfo = (DatanodeInfo) datanode;
-      datanodes.add(builder.withHostname(hostname)
+      datanodes.add(builder.withHostname(nodeManager.getHostName(datanode))
           .withDatanodeStorageReport(storageReport)
           .withLastHeartbeat(nodeManager.getLastHeartbeat(datanode))
           .withState(nodeState)
@@ -147,10 +147,10 @@ public class NodeEndpoint {
           .withPipelines(pipelines)
           .withLeaderCount(leaderCount.get())
           .withUUid(datanode.getUuidString())
-          .withVersion(datanode.getVersion())
-          .withSetupTime(datanode.getSetupTime())
-          .withRevision(datanode.getRevision())
-          .withBuildDate(datanode.getBuildDate())
+          .withVersion(nodeManager.getVersion(datanode))
+          .withSetupTime(nodeManager.getSetupTime(datanode))
+          .withRevision(nodeManager.getRevision(datanode))
+          .withBuildDate(nodeManager.getBuildDate(datanode))
           .withLayoutVersion(
               dnInfo.getLastKnownLayoutVersion().getMetadataLayoutVersion())
           .build());
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
index 4e1cd7e..0f6d795 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.ozone.recon.scm;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -28,6 +29,8 @@ import java.util.UUID;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
@@ -39,10 +42,13 @@ import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.protocol.VersionResponse;
 import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
+import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
+import org.apache.hadoop.ozone.protocol.commands.ReregisterCommand;
 import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
 import org.apache.hadoop.util.Time;
 
@@ -71,6 +77,13 @@ public class ReconNodeManager extends SCMNodeManager {
    * and their last heartbeat time.
    */
   private Map<UUID, Long> datanodeHeartbeatMap = new HashMap<>();
+  private Map<UUID, DatanodeDetails> inMemDatanodeDetails = new HashMap<>();
+
+  private long reconDatanodeOutdatedTime;
+  private static int reconStaleDatanodeMultiplier = 3;
+
+  private static final DatanodeDetails EMPTY_DATANODE_DETAILS =
+      DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build();
 
   public ReconNodeManager(OzoneConfiguration conf,
                           SCMStorageConfig scmStorageConfig,
@@ -80,6 +93,8 @@ public class ReconNodeManager extends SCMNodeManager {
                           HDDSLayoutVersionManager scmLayoutVersionManager) {
     super(conf, scmStorageConfig, eventPublisher, networkTopology,
         SCMContext.emptyContext(), scmLayoutVersionManager);
+    this.reconDatanodeOutdatedTime = reconStaleDatanodeMultiplier *
+        HddsServerUtil.getScmHeartbeatInterval(conf);
     this.nodeDB = nodeDB;
     loadExistingNodes();
   }
@@ -132,6 +147,61 @@ public class ReconNodeManager extends SCMNodeManager {
     return datanodeHeartbeatMap.getOrDefault(datanodeDetails.getUuid(), 0L);
   }
 
+  /**
+   * Returns the hostname of the given node.
+   *
+   * @param datanodeDetails DatanodeDetails
+   * @return hostname
+   */
+  public String getHostName(DatanodeDetails datanodeDetails) {
+    return inMemDatanodeDetails.getOrDefault(datanodeDetails.getUuid(),
+        EMPTY_DATANODE_DETAILS).getHostName();
+  }
+
+  /**
+   * Returns the version of the given node.
+   *
+   * @param datanodeDetails DatanodeDetails
+   * @return setTime
+   */
+  public String getVersion(DatanodeDetails datanodeDetails) {
+    return inMemDatanodeDetails.getOrDefault(datanodeDetails.getUuid(),
+        EMPTY_DATANODE_DETAILS).getVersion();
+  }
+
+  /**
+   * Returns the setupTime of the given node.
+   *
+   * @param datanodeDetails DatanodeDetails
+   * @return setupTime
+   */
+  public long getSetupTime(DatanodeDetails datanodeDetails) {
+    return inMemDatanodeDetails.getOrDefault(datanodeDetails.getUuid(),
+        EMPTY_DATANODE_DETAILS).getSetupTime();
+  }
+
+  /**
+   * Returns the revision of the given node.
+   *
+   * @param datanodeDetails DatanodeDetails
+   * @return revision
+   */
+  public String getRevision(DatanodeDetails datanodeDetails) {
+    return inMemDatanodeDetails.getOrDefault(datanodeDetails.getUuid(),
+        EMPTY_DATANODE_DETAILS).getRevision();
+  }
+
+  /**
+   * Returns the build date of the given node.
+   *
+   * @param datanodeDetails DatanodeDetails
+   * @return buildDate
+   */
+  public String getBuildDate(DatanodeDetails datanodeDetails) {
+    return inMemDatanodeDetails.getOrDefault(datanodeDetails.getUuid(),
+        EMPTY_DATANODE_DETAILS).getBuildDate();
+  }
+
   @Override
   public void onMessage(CommandForDatanode commandForDatanode,
                         EventPublisher ignored) {
@@ -155,9 +225,18 @@ public class ReconNodeManager extends SCMNodeManager {
   @Override
   public List<SCMCommand> processHeartbeat(DatanodeDetails datanodeDetails,
                                            LayoutVersionProto layoutInfo) {
+    List<SCMCommand> cmds = new ArrayList<>();
+    long currentTime = Time.now();
+    if (needUpdate(datanodeDetails, currentTime)) {
+      cmds.add(new ReregisterCommand());
+      LOG.info("Sending ReregisterCommand() for " +
+          datanodeDetails.getHostName());
+      datanodeHeartbeatMap.put(datanodeDetails.getUuid(), Time.now());
+      return cmds;
+    }
     // Update heartbeat map with current time
     datanodeHeartbeatMap.put(datanodeDetails.getUuid(), Time.now());
-    List<SCMCommand> cmds = super.processHeartbeat(datanodeDetails, layoutInfo);
+    cmds.addAll(super.processHeartbeat(datanodeDetails, layoutInfo));
     return cmds.stream()
         .filter(c -> ALLOWED_COMMANDS.contains(c.getType()))
         .collect(toList());
@@ -173,6 +252,25 @@ public class ReconNodeManager extends SCMNodeManager {
         reportedDn.getPersistedOpStateExpiryEpochSec());
   }
 
+  @Override
+  public RegisteredCommand register(
+      DatanodeDetails datanodeDetails, NodeReportProto nodeReport,
+      PipelineReportsProto pipelineReportsProto,
+      LayoutVersionProto layoutInfo) {
+    inMemDatanodeDetails.put(datanodeDetails.getUuid(), datanodeDetails);
+    if (isNodeRegistered(datanodeDetails)) {
+      try {
+        nodeDB.put(datanodeDetails.getUuid(), datanodeDetails);
+        LOG.info("Updating nodeDB for " + datanodeDetails.getHostName());
+      } catch (IOException e) {
+        LOG.error("Can not update node {} to Node DB.",
+            datanodeDetails.getUuid());
+      }
+    }
+    return super.register(datanodeDetails, nodeReport, pipelineReportsProto,
+        layoutInfo);
+  }
+
   public void updateNodeOperationalStateFromScm(HddsProtos.Node scmNode,
                                                 DatanodeDetails dnDetails)
       throws NodeNotFoundException {
@@ -190,4 +288,10 @@ public class ReconNodeManager extends SCMNodeManager {
       scmDnd.setPersistedOpState(nodeOperationalStateFromScm);
     }
   }
+
+  private boolean needUpdate(DatanodeDetails datanodeDetails,
+      long currentTime) {
+    return currentTime - getLastHeartbeat(datanodeDetails) >=
+        reconDatanodeOutdatedTime;
+  }
 }
diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx
index 962eefc..62093d6 100644
--- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx
+++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx
@@ -240,7 +240,7 @@ const COLUMNS = [
     title: 'Version',
     dataIndex: 'version',
     key: 'version',
-    isVisible: false,
+    isVisible: true,
     isSearchable: true,
     sorter: (a: IDatanode, b: IDatanode) => a.version.localeCompare(b.version),
     defaultSortOrder: 'ascend' as const
@@ -249,7 +249,7 @@ const COLUMNS = [
     title: 'SetupTime',
     dataIndex: 'setupTime',
     key: 'setupTime',
-    isVisible: false,
+    isVisible: true,
     sorter: (a: IDatanode, b: IDatanode) => a.setupTime - b.setupTime,
     render: (uptime: number) => {
       return uptime > 0 ? moment(uptime).format('ll LTS') : 'NA';
@@ -259,7 +259,7 @@ const COLUMNS = [
     title: 'Revision',
     dataIndex: 'revision',
     key: 'revision',
-    isVisible: false,
+    isVisible: true,
     isSearchable: true,
     sorter: (a: IDatanode, b: IDatanode) => a.revision.localeCompare(b.revision),
     defaultSortOrder: 'ascend' as const
@@ -268,7 +268,7 @@ const COLUMNS = [
     title: 'BuildDate',
     dataIndex: 'buildDate',
     key: 'buildDate',
-    isVisible: false,
+    isVisible: true,
     isSearchable: true,
     sorter: (a: IDatanode, b: IDatanode) => a.buildDate.localeCompare(b.buildDate),
     defaultSortOrder: 'ascend' as const
diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
index 1c4ef2f..4687aa0 100644
--- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
+++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconNodeManager.java
@@ -204,4 +204,42 @@ public class TestReconNodeManager {
     assertEquals(1, nodes.size());
     assertEquals(datanodeDetails.getUuid(), nodes.get(0).getUuid());
   }
+
+  @Test
+  public void testDatanodeUpdate() throws IOException {
+    ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf);
+    EventQueue eventQueue = new EventQueue();
+    NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
+    Table<UUID, DatanodeDetails> nodeTable =
+        ReconSCMDBDefinition.NODES.getTable(store);
+    ReconNodeManager reconNodeManager = new ReconNodeManager(conf,
+        scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
+    ReconNewNodeHandler reconNewNodeHandler =
+        new ReconNewNodeHandler(reconNodeManager);
+    assertTrue(reconNodeManager.getAllNodes().isEmpty());
+
+    DatanodeDetails datanodeDetails = randomDatanodeDetails();
+    datanodeDetails.setHostName("hostname1");
+    String uuidString = datanodeDetails.getUuidString();
+
+    // Register "hostname1" datanode.
+    reconNodeManager.register(datanodeDetails, null, null);
+    reconNewNodeHandler.onMessage(reconNodeManager.getNodeByUuid(uuidString),
+        null);
+
+    assertEquals(1, reconNodeManager.getAllNodes().size());
+    assertNotNull(reconNodeManager.getNodeByUuid(uuidString));
+    assertEquals("hostname1",
+        reconNodeManager.getNodeByUuid(uuidString).getHostName());
+
+    datanodeDetails.setHostName("hostname2");
+    // Upon processing the heartbeat, the illegal command should be filtered out
+    List<SCMCommand> returnedCmds =
+        reconNodeManager.processHeartbeat(datanodeDetails,
+            defaultLayoutVersionProto());
+    assertEquals(1, returnedCmds.size());
+    assertEquals(SCMCommandProto.Type.reregisterCommand,
+        returnedCmds.get(0).getType());
+
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org