You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jb...@apache.org on 2021/02/15 19:40:44 UTC
[hadoop] branch branch-3.2 updated: HDFS-15821. Add metrics for
in-service datanodes (#2690). Contributed by Zehao Chen.
This is an automated email from the ASF dual-hosted git repository.
jbrennan pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 49480b8 HDFS-15821. Add metrics for in-service datanodes (#2690). Contributed by Zehao Chen.
49480b8 is described below
commit 49480b87e0c197396d0eb04f62bf9ec50bbe3597
Author: Zehao Chen <ze...@gmail.com>
AuthorDate: Mon Feb 15 11:14:32 2021 -0600
HDFS-15821. Add metrics for in-service datanodes (#2690). Contributed by Zehao Chen.
(cherry picked from commit 07a4220cd27c69b86b837e8da320bad0031f7895)
---
.../federation/metrics/NamenodeBeanMetrics.java | 5 ++
.../hadoop/hdfs/server/namenode/FSNamesystem.java | 13 +++
.../server/namenode/metrics/FSNamesystemMBean.java | 6 ++
.../hdfs/server/namenode/TestNameNodeMXBean.java | 97 ++++++++++++++++++++++
4 files changed, 121 insertions(+)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java
index 0ca5f73..2c6bea9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java
@@ -590,6 +590,11 @@ public class NamenodeBeanMetrics
}
@Override
+ public int getNumInServiceLiveDataNodes() {
+ return 0;
+ }
+
+ @Override
public int getVolumeFailuresTotal() {
return 0;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 5abbc1b..7d7b9ab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -5189,6 +5189,19 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
}
@Override // FSNamesystemMBean
+ @Metric({"NumInServiceLiveDataNodes",
+ "Number of live datanodes which are currently in service"})
+ public int getNumInServiceLiveDataNodes() {
+ final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
+ getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
+ int liveInService = live.size();
+ for (DatanodeDescriptor node : live) {
+ liveInService -= node.isInMaintenance() ? 1 : 0;
+ }
+ return liveInService;
+ }
+
+ @Override // FSNamesystemMBean
@Metric({"VolumeFailuresTotal",
"Total number of volume failures across all Datanodes"})
public int getVolumeFailuresTotal() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
index c25bafd..5b1b213 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
@@ -151,6 +151,12 @@ public interface FSNamesystemMBean {
public int getNumDecomDeadDataNodes();
/**
+ * @return Number of in-service data nodes, where NumInServiceDataNodes =
+ * NumLiveDataNodes - NumDecomLiveDataNodes - NumInMaintenanceLiveDataNodes
+ */
+ int getNumInServiceLiveDataNodes();
+
+ /**
* Number of failed data volumes across all live data nodes.
* @return number of failed data volumes across all live data nodes
*/
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
index bfb3c49..6e04917 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
@@ -430,6 +430,103 @@ public class TestNameNodeMXBean {
}
}
+ @Test(timeout = 120000)
+ public void testInServiceNodes() throws Exception {
+ Configuration conf = new Configuration();
+ conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
+ 30);
+ conf.setClass(DFSConfigKeys.DFS_NAMENODE_HOSTS_PROVIDER_CLASSNAME_KEY,
+ CombinedHostFileManager.class, HostConfigManager.class);
+ MiniDFSCluster cluster = null;
+ HostsFileWriter hostsFileWriter = new HostsFileWriter();
+ hostsFileWriter.initialize(conf, "temp/TestInServiceNodes");
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+ cluster.waitActive();
+
+ final FSNamesystem fsn = cluster.getNameNode().namesystem;
+ final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+ final ObjectName mxbeanName = new ObjectName(
+ "Hadoop:service=NameNode,name=FSNamesystem");
+
+ List<String> hosts = new ArrayList<>();
+ for (DataNode dn : cluster.getDataNodes()) {
+ hosts.add(dn.getDisplayName());
+ }
+ hostsFileWriter.initIncludeHosts(hosts.toArray(
+ new String[hosts.size()]));
+ fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
+
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ try {
+ int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumLiveDataNodes");
+ return numLiveDataNodes == 3;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+ }, 1000, 60000);
+
+ // Verify nodes
+ int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumDecomLiveDataNodes");
+ int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumInMaintenanceLiveDataNodes");
+ int numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumInServiceLiveDataNodes");
+ assertEquals(0, numDecomLiveDataNodes);
+ assertEquals(0, numInMaintenanceLiveDataNodes);
+ assertEquals(3, numInServiceLiveDataNodes);
+
+ // Add 2 nodes to out-of-service list
+ ArrayList<String> decomNodes = new ArrayList<>();
+ decomNodes.add(cluster.getDataNodes().get(0).getDisplayName());
+
+ Map<String, Long> maintenanceNodes = new HashMap<>();
+ final int expirationInMs = 30 * 1000;
+ maintenanceNodes.put(cluster.getDataNodes().get(1).getDisplayName(),
+ Time.now() + expirationInMs);
+
+ hostsFileWriter.initOutOfServiceHosts(decomNodes, maintenanceNodes);
+ fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
+
+ // Wait for the DatanodeAdminManager to complete check
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ try {
+ int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumLiveDataNodes");
+ int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumDecomLiveDataNodes");
+ int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute(
+ mxbeanName, "NumInMaintenanceLiveDataNodes");
+ return numLiveDataNodes == 3 &&
+ numDecomLiveDataNodes == 1 &&
+ numInMaintenanceLiveDataNodes == 1;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+ }, 1000, 60000);
+
+ // Verify nodes
+ numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
+ "NumInServiceLiveDataNodes");
+ assertEquals(1, numInServiceLiveDataNodes);
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ hostsFileWriter.cleanup();
+ }
+ }
+
@Test (timeout = 120000)
public void testMaintenanceNodes() throws Exception {
LOG.info("Starting testMaintenanceNodes");
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org