You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bh...@apache.org on 2019/03/14 19:42:07 UTC
[hadoop] branch trunk updated: HDDS-917. Expose NodeManagerMXBean
as a MetricsSource. Contributed by Siddharth Wagle.
This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 091a664 HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle.
091a664 is described below
commit 091a664977a3b97cd6057129da9d093a73d63a68
Author: Bharat Viswanadham <bh...@apache.org>
AuthorDate: Thu Mar 14 12:30:06 2019 -0700
HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle.
---
.../hadoop/hdds/scm/node/SCMNodeManager.java | 4 +-
.../hadoop/hdds/scm/node/SCMNodeMetrics.java | 76 ++++++++++++++++++++--
.../hadoop/ozone/scm/node/TestSCMNodeMetrics.java | 40 ++++++++++++
3 files changed, 114 insertions(+), 6 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index e457b13..4464ed1 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -102,7 +102,6 @@ public class SCMNodeManager implements NodeManager {
public SCMNodeManager(OzoneConfiguration conf, String clusterID,
StorageContainerManager scmManager, EventPublisher eventPublisher)
throws IOException {
- this.metrics = SCMNodeMetrics.create();
this.nodeStateManager = new NodeStateManager(conf, eventPublisher);
this.clusterID = clusterID;
this.version = VersionInfo.getLatestVersion();
@@ -110,6 +109,7 @@ public class SCMNodeManager implements NodeManager {
this.scmManager = scmManager;
LOG.info("Entering startup chill mode.");
registerMXBean();
+ this.metrics = SCMNodeMetrics.create(this);
}
private void registerMXBean() {
@@ -118,7 +118,7 @@ public class SCMNodeManager implements NodeManager {
}
private void unregisterMXBean() {
- if(this.nmInfoBean != null) {
+ if (this.nmInfoBean != null) {
MBeans.unregister(this.nmInfoBean);
this.nmInfoBean = null;
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
index 30b1079..1596523 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
@@ -18,11 +18,24 @@
package org.apache.hadoop.hdds.scm.node;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
+
+import java.util.Map;
+
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.metrics2.MetricsCollector;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.Interns;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
/**
@@ -30,7 +43,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong;
*/
@InterfaceAudience.Private
@Metrics(about = "SCM NodeManager Metrics", context = "ozone")
-public final class SCMNodeMetrics {
+public final class SCMNodeMetrics implements MetricsSource {
private static final String SOURCE_NAME =
SCMNodeMetrics.class.getSimpleName();
@@ -40,18 +53,26 @@ public final class SCMNodeMetrics {
private @Metric MutableCounterLong numNodeReportProcessed;
private @Metric MutableCounterLong numNodeReportProcessingFailed;
+ private final MetricsRegistry registry;
+ private final NodeManagerMXBean managerMXBean;
+ private final MetricsInfo recordInfo = Interns.info("SCMNodeManager",
+ "SCM NodeManager metrics");
+
/** Private constructor. */
- private SCMNodeMetrics() { }
+ private SCMNodeMetrics(NodeManagerMXBean managerMXBean) {
+ this.managerMXBean = managerMXBean;
+ this.registry = new MetricsRegistry(recordInfo);
+ }
/**
* Create and returns SCMNodeMetrics instance.
*
* @return SCMNodeMetrics
*/
- public static SCMNodeMetrics create() {
+ public static SCMNodeMetrics create(NodeManagerMXBean managerMXBean) {
MetricsSystem ms = DefaultMetricsSystem.instance();
return ms.register(SOURCE_NAME, "SCM NodeManager Metrics",
- new SCMNodeMetrics());
+ new SCMNodeMetrics(managerMXBean));
}
/**
@@ -90,4 +111,51 @@ public final class SCMNodeMetrics {
numNodeReportProcessingFailed.incr();
}
+ /**
+ * Get aggregated counter and gauage metrics.
+ */
+ @Override
+ @SuppressWarnings("SuspiciousMethodCalls")
+ public void getMetrics(MetricsCollector collector, boolean all) {
+ Map<String, Integer> nodeCount = managerMXBean.getNodeCount();
+ Map<String, Long> nodeInfo = managerMXBean.getNodeInfo();
+
+ registry.snapshot(
+ collector.addRecord(registry.info()) // Add annotated ones first
+ .addGauge(Interns.info(
+ "HealthyNodes",
+ "Number of healthy datanodes"),
+ nodeCount.get(HEALTHY.toString()))
+ .addGauge(Interns.info("StaleNodes",
+ "Number of stale datanodes"),
+ nodeCount.get(STALE.toString()))
+ .addGauge(Interns.info("DeadNodes",
+ "Number of dead datanodes"),
+ nodeCount.get(DEAD.toString()))
+ .addGauge(Interns.info("DecommissioningNodes",
+ "Number of decommissioning datanodes"),
+ nodeCount.get(DECOMMISSIONING.toString()))
+ .addGauge(Interns.info("DecommissionedNodes",
+ "Number of decommissioned datanodes"),
+ nodeCount.get(DECOMMISSIONED.toString()))
+ .addGauge(Interns.info("DiskCapacity",
+ "Total disk capacity"),
+ nodeInfo.get("DISKCapacity"))
+ .addGauge(Interns.info("DiskUsed",
+ "Total disk capacity used"),
+ nodeInfo.get("DISKUsed"))
+ .addGauge(Interns.info("DiskRemaining",
+ "Total disk capacity remaining"),
+ nodeInfo.get("DISKRemaining"))
+ .addGauge(Interns.info("SSDCapacity",
+ "Total ssd capacity"),
+ nodeInfo.get("SSDCapacity"))
+ .addGauge(Interns.info("SSDUsed",
+ "Total ssd capacity used"),
+ nodeInfo.get("SSDUsed"))
+ .addGauge(Interns.info("SSDRemaining",
+ "Total disk capacity remaining"),
+ nodeInfo.get("SSDRemaining")),
+ all);
+ }
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
index c18ae5f..d19be93 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
@@ -35,6 +35,7 @@ import org.junit.Before;
import org.junit.Test;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
@@ -128,6 +129,45 @@ public class TestSCMNodeMetrics {
getMetrics(SCMNodeMetrics.class.getSimpleName()));
}
+ /**
+ * Verify that datanode aggregated state and capacity metrics are reported.
+ */
+ @Test
+ public void testNodeCountAndInfoMetricsReported() throws Exception {
+ HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
+ StorageReportProto storageReport = TestUtils.createStorageReport(
+ datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
+ NodeReportProto nodeReport = NodeReportProto.newBuilder()
+ .addStorageReport(storageReport).build();
+ datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
+ datanode.getDatanodeStateMachine().triggerHeartbeat();
+ // Give some time so that SCM receives and processes the heartbeat.
+ Thread.sleep(300L);
+
+ assertGauge("HealthyNodes", 1,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("StaleNodes", 0,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DeadNodes", 0,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DecommissioningNodes", 0,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DecommissionedNodes", 0,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DiskCapacity", 100L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DiskUsed", 10L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("DiskRemaining", 90L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("SSDCapacity", 0L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("SSDUsed", 0L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ assertGauge("SSDRemaining", 0L,
+ getMetrics(SCMNodeMetrics.class.getSimpleName()));
+ }
+
@After
public void teardown() {
cluster.shutdown();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org