You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by di...@apache.org on 2020/04/23 23:27:10 UTC
[hadoop-ozone] branch master updated: HDDS-3480. TestSCMNodeMetrics
is flaky (#860)
This is an automated email from the ASF dual-hosted git repository.
dineshc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 0e3f807 HDDS-3480. TestSCMNodeMetrics is flaky (#860)
0e3f807 is described below
commit 0e3f807d1f09e0399ea5509444afed8025c4951f
Author: Elek, Márton <el...@users.noreply.github.com>
AuthorDate: Fri Apr 24 01:27:01 2020 +0200
HDDS-3480. TestSCMNodeMetrics is flaky (#860)
---
.../hadoop/hdds/scm/node/SCMNodeMetrics.java | 17 +-
.../hadoop/ozone/scm/node/TestSCMNodeMetrics.java | 204 +++++++++++++++++++++
.../hadoop/ozone/scm/node/TestSCMNodeMetrics.java | 177 ------------------
3 files changed, 212 insertions(+), 186 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
index dbb4242..c515f2f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
@@ -18,12 +18,6 @@
package org.apache.hadoop.hdds.scm.node;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
-
import java.util.Map;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
@@ -39,6 +33,12 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.ozone.OzoneConsts;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
+
/**
* This class maintains Node related metrics.
*/
@@ -46,7 +46,7 @@ import org.apache.hadoop.ozone.OzoneConsts;
@Metrics(about = "SCM NodeManager Metrics", context = OzoneConsts.OZONE)
public final class SCMNodeMetrics implements MetricsSource {
- private static final String SOURCE_NAME =
+ public static final String SOURCE_NAME =
SCMNodeMetrics.class.getSimpleName();
private @Metric MutableCounterLong numHBProcessed;
@@ -113,14 +113,13 @@ public final class SCMNodeMetrics implements MetricsSource {
}
/**
- * Get aggregated counter and gauage metrics.
+ * Get aggregated counter and gauge metrics.
*/
@Override
@SuppressWarnings("SuspiciousMethodCalls")
public void getMetrics(MetricsCollector collector, boolean all) {
Map<String, Integer> nodeCount = managerMXBean.getNodeCount();
Map<String, Long> nodeInfo = managerMXBean.getNodeInfo();
-
registry.snapshot(
collector.addRecord(registry.info()) // Add annotated ones first
.addGauge(Interns.info(
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
new file mode 100644
index 0000000..cebedf2
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.scm.node;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.UUID;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
+import org.apache.hadoop.hdds.scm.TestUtils;
+import org.apache.hadoop.hdds.scm.net.NetworkTopology;
+import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
+import org.apache.hadoop.hdds.scm.node.SCMNodeMetrics;
+import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
+import org.apache.hadoop.hdds.server.events.EventQueue;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+
+import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
+import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import static org.junit.Assert.assertEquals;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+/**
+ * Test cases to verify the metrics exposed by SCMNodeManager.
+ */
+public class TestSCMNodeMetrics {
+
+ private static SCMNodeManager nodeManager;
+
+ private static DatanodeDetails registeredDatanode;
+
+ @BeforeClass
+ public static void setup() throws Exception {
+
+ OzoneConfiguration source = new OzoneConfiguration();
+ EventQueue publisher = new EventQueue();
+ SCMStorageConfig config =
+ new SCMStorageConfig(NodeType.DATANODE, new File("/tmp"), "storage");
+ nodeManager = new SCMNodeManager(source, config, publisher,
+ Mockito.mock(NetworkTopology.class));
+
+ registeredDatanode = DatanodeDetails.newBuilder()
+ .setHostName("localhost")
+ .setIpAddress("127.0.0.1")
+ .setUuid(UUID.randomUUID().toString())
+ .build();
+
+ nodeManager.register(registeredDatanode, createNodeReport(),
+ PipelineReportsProto.newBuilder().build());
+
+ }
+
+ @AfterClass
+ public static void teardown() throws IOException {
+ nodeManager.close();
+ }
+
+ /**
+ * Verifies heartbeat processing count.
+ *
+ * @throws InterruptedException
+ */
+ @Test
+ public void testHBProcessing() throws InterruptedException {
+ long hbProcessed = getCounter("NumHBProcessed");
+
+ NodeReportProto nodeReport = createNodeReport();
+
+ nodeManager.processHeartbeat(registeredDatanode);
+
+ assertEquals("NumHBProcessed", hbProcessed + 1,
+ getCounter("NumHBProcessed"));
+ }
+
+ /**
+ * Verifies heartbeat processing failure count.
+ */
+ @Test
+ public void testHBProcessingFailure() {
+
+ long hbProcessedFailed = getCounter("NumHBProcessingFailed");
+
+ nodeManager.processHeartbeat(MockDatanodeDetails
+ .randomDatanodeDetails());
+
+ assertEquals("NumHBProcessingFailed", hbProcessedFailed + 1,
+ getCounter("NumHBProcessingFailed"));
+ }
+
+ /**
+ * Verifies node report processing count.
+ *
+ * @throws InterruptedException
+ */
+ @Test
+ public void testNodeReportProcessing() throws InterruptedException {
+
+ long nrProcessed = getCounter("NumNodeReportProcessed");
+
+ StorageReportProto storageReport =
+ TestUtils.createStorageReport(registeredDatanode.getUuid(), "/tmp", 100,
+ 10, 90,
+ null);
+ NodeReportProto nodeReport = NodeReportProto.newBuilder()
+ .addStorageReport(storageReport).build();
+
+ nodeManager.processNodeReport(registeredDatanode, nodeReport);
+ Assert.assertEquals("NumNodeReportProcessed", nrProcessed + 1,
+ getCounter("NumNodeReportProcessed"));
+ }
+
+ /**
+ * Verifies node report processing failure count.
+ */
+ @Test
+ public void testNodeReportProcessingFailure() {
+
+ long nrProcessed = getCounter("NumNodeReportProcessingFailed");
+ DatanodeDetails randomDatanode =
+ MockDatanodeDetails.randomDatanodeDetails();
+
+ StorageReportProto storageReport = TestUtils.createStorageReport(
+ randomDatanode.getUuid(), "/tmp", 100, 10, 90, null);
+
+ NodeReportProto nodeReport = NodeReportProto.newBuilder()
+ .addStorageReport(storageReport).build();
+
+ nodeManager.processNodeReport(randomDatanode, nodeReport);
+ assertEquals("NumNodeReportProcessingFailed", nrProcessed + 1,
+ getCounter("NumNodeReportProcessingFailed"));
+ }
+
+ /**
+ * Verify that datanode aggregated state and capacity metrics are
+ * reported.
+ */
+ @Test
+ public void testNodeCountAndInfoMetricsReported() throws Exception {
+
+ StorageReportProto storageReport = TestUtils.createStorageReport(
+ registeredDatanode.getUuid(), "/tmp", 100, 10, 90, null);
+ NodeReportProto nodeReport = NodeReportProto.newBuilder()
+ .addStorageReport(storageReport).build();
+
+ nodeManager.processNodeReport(registeredDatanode, nodeReport);
+
+ MetricsRecordBuilder metricsSource = getMetrics(SCMNodeMetrics.SOURCE_NAME);
+
+ assertGauge("HealthyNodes", 1, metricsSource);
+ assertGauge("StaleNodes", 0, metricsSource);
+ assertGauge("DeadNodes", 0, metricsSource);
+ assertGauge("DecommissioningNodes", 0, metricsSource);
+ assertGauge("DecommissionedNodes", 0, metricsSource);
+ assertGauge("DiskCapacity", 100L, metricsSource);
+ assertGauge("DiskUsed", 10L, metricsSource);
+ assertGauge("DiskRemaining", 90L, metricsSource);
+ assertGauge("SSDCapacity", 0L, metricsSource);
+ assertGauge("SSDUsed", 0L, metricsSource);
+ assertGauge("SSDRemaining", 0L, metricsSource);
+
+ }
+
+ private long getCounter(String metricName) {
+ return getLongCounter(metricName, getMetrics(SCMNodeMetrics.SOURCE_NAME));
+ }
+
+ private static NodeReportProto createNodeReport() {
+ return NodeReportProto.newBuilder()
+ .addStorageReport(
+ StorageReportProto.newBuilder()
+ .setCapacity(1)
+ .setStorageUuid(UUID.randomUUID().toString())
+ .setStorageLocation("/tmp")
+ .build())
+ .build();
+ }
+}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
deleted file mode 100644
index b36c323..0000000
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.scm.node;
-
-import org.apache.hadoop.hdds.HddsConfigKeys;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
-import org.apache.hadoop.hdds.protocol.proto
- .StorageContainerDatanodeProtocolProtos.NodeReportProto;
-import org.apache.hadoop.hdds.protocol.proto
- .StorageContainerDatanodeProtocolProtos.StorageReportProto;
-import org.apache.hadoop.hdds.scm.TestUtils;
-import org.apache.hadoop.hdds.scm.node.SCMNodeMetrics;
-import org.apache.hadoop.metrics2.MetricsRecordBuilder;
-import org.apache.hadoop.ozone.HddsDatanodeService;
-import org.apache.hadoop.ozone.MiniOzoneCluster;
-
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
-import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
-import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
-import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
-
-/**
- * Test cases to verify the metrics exposed by SCMNodeManager.
- */
-public class TestSCMNodeMetrics {
-
- private MiniOzoneCluster cluster;
-
- @Before
- public void setup() throws Exception {
- OzoneConfiguration conf = new OzoneConfiguration();
- conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
- cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
- cluster.waitForClusterToBeReady();
- }
-
- /**
- * Verifies heartbeat processing count.
- *
- * @throws InterruptedException
- */
- @Test
- public void testHBProcessing() throws InterruptedException {
- MetricsRecordBuilder metrics = getMetrics(
- SCMNodeMetrics.class.getSimpleName());
- long hbProcessed = getLongCounter("NumHBProcessed", metrics);
- cluster.getHddsDatanodes().get(0)
- .getDatanodeStateMachine().triggerHeartbeat();
- // Give some time so that SCM receives and processes the heartbeat.
- Thread.sleep(100L);
- assertCounter("NumHBProcessed", hbProcessed + 1,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- }
-
- /**
- * Verifies heartbeat processing failure count.
- */
- @Test
- public void testHBProcessingFailure() {
- MetricsRecordBuilder metrics = getMetrics(
- SCMNodeMetrics.class.getSimpleName());
- long hbProcessedFailed = getLongCounter("NumHBProcessingFailed", metrics);
- cluster.getStorageContainerManager().getScmNodeManager()
- .processHeartbeat(MockDatanodeDetails.randomDatanodeDetails());
- assertCounter("NumHBProcessingFailed", hbProcessedFailed + 1,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- }
-
- /**
- * Verifies node report processing count.
- *
- * @throws InterruptedException
- */
- @Test
- public void testNodeReportProcessing() throws InterruptedException {
- MetricsRecordBuilder metrics = getMetrics(
- SCMNodeMetrics.class.getSimpleName());
- long nrProcessed = getLongCounter("NumNodeReportProcessed", metrics);
- HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
- StorageReportProto storageReport = TestUtils.createStorageReport(
- datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
- NodeReportProto nodeReport = NodeReportProto.newBuilder()
- .addStorageReport(storageReport).build();
- datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
- cluster.getStorageContainerManager().getScmNodeManager()
- .processNodeReport(datanode.getDatanodeDetails(), nodeReport);
-
- assertCounter("NumNodeReportProcessed", nrProcessed + 1,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- }
-
- /**
- * Verifies node report processing failure count.
- */
- @Test
- public void testNodeReportProcessingFailure() {
- MetricsRecordBuilder metrics = getMetrics(
- SCMNodeMetrics.class.getSimpleName());
- long nrProcessed = getLongCounter("NumNodeReportProcessingFailed",
- metrics);
- DatanodeDetails datanode = MockDatanodeDetails.randomDatanodeDetails();
- StorageReportProto storageReport = TestUtils.createStorageReport(
- datanode.getUuid(), "/tmp", 100, 10, 90, null);
- NodeReportProto nodeReport = NodeReportProto.newBuilder()
- .addStorageReport(storageReport).build();
-
- cluster.getStorageContainerManager().getScmNodeManager()
- .processNodeReport(datanode, nodeReport);
- assertCounter("NumNodeReportProcessingFailed", nrProcessed + 1,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- }
-
- /**
- * Verify that datanode aggregated state and capacity metrics are reported.
- */
- @Test
- public void testNodeCountAndInfoMetricsReported() throws Exception {
- HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
- StorageReportProto storageReport = TestUtils.createStorageReport(
- datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
- NodeReportProto nodeReport = NodeReportProto.newBuilder()
- .addStorageReport(storageReport).build();
- datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
- cluster.getStorageContainerManager().getScmNodeManager()
- .processNodeReport(datanode.getDatanodeDetails(), nodeReport);
-
- assertGauge("HealthyNodes", 1,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("StaleNodes", 0,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DeadNodes", 0,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DecommissioningNodes", 0,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DecommissionedNodes", 0,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DiskCapacity", 100L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DiskUsed", 10L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("DiskRemaining", 90L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("SSDCapacity", 0L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("SSDUsed", 0L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- assertGauge("SSDRemaining", 0L,
- getMetrics(SCMNodeMetrics.class.getSimpleName()));
- }
-
- @After
- public void teardown() {
- cluster.shutdown();
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org