You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by di...@apache.org on 2020/04/23 23:27:10 UTC

[hadoop-ozone] branch master updated: HDDS-3480. TestSCMNodeMetrics is flaky (#860)

This is an automated email from the ASF dual-hosted git repository.

dineshc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 0e3f807  HDDS-3480. TestSCMNodeMetrics is flaky (#860)
0e3f807 is described below

commit 0e3f807d1f09e0399ea5509444afed8025c4951f
Author: Elek, Márton <el...@users.noreply.github.com>
AuthorDate: Fri Apr 24 01:27:01 2020 +0200

    HDDS-3480. TestSCMNodeMetrics is flaky (#860)
---
 .../hadoop/hdds/scm/node/SCMNodeMetrics.java       |  17 +-
 .../hadoop/ozone/scm/node/TestSCMNodeMetrics.java  | 204 +++++++++++++++++++++
 .../hadoop/ozone/scm/node/TestSCMNodeMetrics.java  | 177 ------------------
 3 files changed, 212 insertions(+), 186 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
index dbb4242..c515f2f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java
@@ -18,12 +18,6 @@
 
 package org.apache.hadoop.hdds.scm.node;
 
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
-
 import java.util.Map;
 
 import org.apache.hadoop.hdds.annotation.InterfaceAudience;
@@ -39,6 +33,12 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
 import org.apache.hadoop.ozone.OzoneConsts;
 
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
+
 /**
  * This class maintains Node related metrics.
  */
@@ -46,7 +46,7 @@ import org.apache.hadoop.ozone.OzoneConsts;
 @Metrics(about = "SCM NodeManager Metrics", context = OzoneConsts.OZONE)
 public final class SCMNodeMetrics implements MetricsSource {
 
-  private static final String SOURCE_NAME =
+  public static final String SOURCE_NAME =
       SCMNodeMetrics.class.getSimpleName();
 
   private @Metric MutableCounterLong numHBProcessed;
@@ -113,14 +113,13 @@ public final class SCMNodeMetrics implements MetricsSource {
   }
 
   /**
-   * Get aggregated counter and gauage metrics.
+   * Get aggregated counter and gauge metrics.
    */
   @Override
   @SuppressWarnings("SuspiciousMethodCalls")
   public void getMetrics(MetricsCollector collector, boolean all) {
     Map<String, Integer> nodeCount = managerMXBean.getNodeCount();
     Map<String, Long> nodeInfo = managerMXBean.getNodeInfo();
-
     registry.snapshot(
         collector.addRecord(registry.info()) // Add annotated ones first
             .addGauge(Interns.info(
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
new file mode 100644
index 0000000..cebedf2
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.scm.node;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.UUID;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
+import org.apache.hadoop.hdds.scm.TestUtils;
+import org.apache.hadoop.hdds.scm.net.NetworkTopology;
+import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
+import org.apache.hadoop.hdds.scm.node.SCMNodeMetrics;
+import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
+import org.apache.hadoop.hdds.server.events.EventQueue;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+
+import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
+import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import static org.junit.Assert.assertEquals;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+/**
+ * Test cases to verify the metrics exposed by SCMNodeManager.
+ */
+public class TestSCMNodeMetrics {
+
+  private static SCMNodeManager nodeManager;
+
+  private static DatanodeDetails registeredDatanode;
+
+  @BeforeClass
+  public static void setup() throws Exception {
+
+    OzoneConfiguration source = new OzoneConfiguration();
+    EventQueue publisher = new EventQueue();
+    SCMStorageConfig config =
+        new SCMStorageConfig(NodeType.DATANODE, new File("/tmp"), "storage");
+    nodeManager = new SCMNodeManager(source, config, publisher,
+        Mockito.mock(NetworkTopology.class));
+
+    registeredDatanode = DatanodeDetails.newBuilder()
+        .setHostName("localhost")
+        .setIpAddress("127.0.0.1")
+        .setUuid(UUID.randomUUID().toString())
+        .build();
+
+    nodeManager.register(registeredDatanode, createNodeReport(),
+        PipelineReportsProto.newBuilder().build());
+
+  }
+
+  @AfterClass
+  public static void teardown() throws IOException {
+    nodeManager.close();
+  }
+
+  /**
+   * Verifies heartbeat processing count.
+   *
+   * @throws InterruptedException
+   */
+  @Test
+  public void testHBProcessing() throws InterruptedException {
+    long hbProcessed = getCounter("NumHBProcessed");
+
+    NodeReportProto nodeReport = createNodeReport();
+
+    nodeManager.processHeartbeat(registeredDatanode);
+
+    assertEquals("NumHBProcessed", hbProcessed + 1,
+        getCounter("NumHBProcessed"));
+  }
+
+  /**
+   * Verifies heartbeat processing failure count.
+   */
+  @Test
+  public void testHBProcessingFailure() {
+
+    long hbProcessedFailed = getCounter("NumHBProcessingFailed");
+
+    nodeManager.processHeartbeat(MockDatanodeDetails
+        .randomDatanodeDetails());
+
+    assertEquals("NumHBProcessingFailed", hbProcessedFailed + 1,
+        getCounter("NumHBProcessingFailed"));
+  }
+
+  /**
+   * Verifies node report processing count.
+   *
+   * @throws InterruptedException
+   */
+  @Test
+  public void testNodeReportProcessing() throws InterruptedException {
+
+    long nrProcessed = getCounter("NumNodeReportProcessed");
+
+    StorageReportProto storageReport =
+        TestUtils.createStorageReport(registeredDatanode.getUuid(), "/tmp", 100,
+            10, 90,
+            null);
+    NodeReportProto nodeReport = NodeReportProto.newBuilder()
+        .addStorageReport(storageReport).build();
+
+    nodeManager.processNodeReport(registeredDatanode, nodeReport);
+    Assert.assertEquals("NumNodeReportProcessed", nrProcessed + 1,
+        getCounter("NumNodeReportProcessed"));
+  }
+
+  /**
+   * Verifies node report processing failure count.
+   */
+  @Test
+  public void testNodeReportProcessingFailure() {
+
+    long nrProcessed = getCounter("NumNodeReportProcessingFailed");
+    DatanodeDetails randomDatanode =
+        MockDatanodeDetails.randomDatanodeDetails();
+
+    StorageReportProto storageReport = TestUtils.createStorageReport(
+        randomDatanode.getUuid(), "/tmp", 100, 10, 90, null);
+
+    NodeReportProto nodeReport = NodeReportProto.newBuilder()
+        .addStorageReport(storageReport).build();
+
+    nodeManager.processNodeReport(randomDatanode, nodeReport);
+    assertEquals("NumNodeReportProcessingFailed", nrProcessed + 1,
+        getCounter("NumNodeReportProcessingFailed"));
+  }
+
+  /**
+   * Verify that datanode aggregated state and capacity metrics are
+   * reported.
+   */
+  @Test
+  public void testNodeCountAndInfoMetricsReported() throws Exception {
+
+    StorageReportProto storageReport = TestUtils.createStorageReport(
+        registeredDatanode.getUuid(), "/tmp", 100, 10, 90, null);
+    NodeReportProto nodeReport = NodeReportProto.newBuilder()
+        .addStorageReport(storageReport).build();
+
+    nodeManager.processNodeReport(registeredDatanode, nodeReport);
+
+    MetricsRecordBuilder metricsSource = getMetrics(SCMNodeMetrics.SOURCE_NAME);
+
+    assertGauge("HealthyNodes", 1, metricsSource);
+    assertGauge("StaleNodes", 0, metricsSource);
+    assertGauge("DeadNodes", 0, metricsSource);
+    assertGauge("DecommissioningNodes", 0, metricsSource);
+    assertGauge("DecommissionedNodes", 0, metricsSource);
+    assertGauge("DiskCapacity", 100L, metricsSource);
+    assertGauge("DiskUsed", 10L, metricsSource);
+    assertGauge("DiskRemaining", 90L, metricsSource);
+    assertGauge("SSDCapacity", 0L, metricsSource);
+    assertGauge("SSDUsed", 0L, metricsSource);
+    assertGauge("SSDRemaining", 0L, metricsSource);
+
+  }
+
+  private long getCounter(String metricName) {
+    return getLongCounter(metricName, getMetrics(SCMNodeMetrics.SOURCE_NAME));
+  }
+
+  private static NodeReportProto createNodeReport() {
+    return NodeReportProto.newBuilder()
+        .addStorageReport(
+            StorageReportProto.newBuilder()
+                .setCapacity(1)
+                .setStorageUuid(UUID.randomUUID().toString())
+                .setStorageLocation("/tmp")
+                .build())
+        .build();
+  }
+}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
deleted file mode 100644
index b36c323..0000000
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.scm.node;
-
-import org.apache.hadoop.hdds.HddsConfigKeys;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
-import org.apache.hadoop.hdds.protocol.proto
-    .StorageContainerDatanodeProtocolProtos.NodeReportProto;
-import org.apache.hadoop.hdds.protocol.proto
-    .StorageContainerDatanodeProtocolProtos.StorageReportProto;
-import org.apache.hadoop.hdds.scm.TestUtils;
-import org.apache.hadoop.hdds.scm.node.SCMNodeMetrics;
-import org.apache.hadoop.metrics2.MetricsRecordBuilder;
-import org.apache.hadoop.ozone.HddsDatanodeService;
-import org.apache.hadoop.ozone.MiniOzoneCluster;
-
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
-import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
-import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
-import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
-
-/**
- * Test cases to verify the metrics exposed by SCMNodeManager.
- */
-public class TestSCMNodeMetrics {
-
-  private MiniOzoneCluster cluster;
-
-  @Before
-  public void setup() throws Exception {
-    OzoneConfiguration conf = new OzoneConfiguration();
-    conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
-    cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
-    cluster.waitForClusterToBeReady();
-  }
-
-  /**
-   * Verifies heartbeat processing count.
-   *
-   * @throws InterruptedException
-   */
-  @Test
-  public void testHBProcessing() throws InterruptedException {
-    MetricsRecordBuilder metrics = getMetrics(
-        SCMNodeMetrics.class.getSimpleName());
-    long hbProcessed = getLongCounter("NumHBProcessed", metrics);
-    cluster.getHddsDatanodes().get(0)
-        .getDatanodeStateMachine().triggerHeartbeat();
-    // Give some time so that SCM receives and processes the heartbeat.
-    Thread.sleep(100L);
-    assertCounter("NumHBProcessed", hbProcessed + 1,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-  }
-
-  /**
-   * Verifies heartbeat processing failure count.
-   */
-  @Test
-  public void testHBProcessingFailure() {
-    MetricsRecordBuilder metrics = getMetrics(
-        SCMNodeMetrics.class.getSimpleName());
-    long hbProcessedFailed = getLongCounter("NumHBProcessingFailed", metrics);
-    cluster.getStorageContainerManager().getScmNodeManager()
-        .processHeartbeat(MockDatanodeDetails.randomDatanodeDetails());
-    assertCounter("NumHBProcessingFailed", hbProcessedFailed + 1,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-  }
-
-  /**
-   * Verifies node report processing count.
-   *
-   * @throws InterruptedException
-   */
-  @Test
-  public void testNodeReportProcessing() throws InterruptedException {
-    MetricsRecordBuilder metrics = getMetrics(
-        SCMNodeMetrics.class.getSimpleName());
-    long nrProcessed = getLongCounter("NumNodeReportProcessed", metrics);
-    HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
-    StorageReportProto storageReport = TestUtils.createStorageReport(
-        datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
-    NodeReportProto nodeReport = NodeReportProto.newBuilder()
-        .addStorageReport(storageReport).build();
-    datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
-    cluster.getStorageContainerManager().getScmNodeManager()
-        .processNodeReport(datanode.getDatanodeDetails(), nodeReport);
-
-    assertCounter("NumNodeReportProcessed", nrProcessed + 1,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-  }
-
-  /**
-   * Verifies node report processing failure count.
-   */
-  @Test
-  public void testNodeReportProcessingFailure() {
-    MetricsRecordBuilder metrics = getMetrics(
-        SCMNodeMetrics.class.getSimpleName());
-    long nrProcessed = getLongCounter("NumNodeReportProcessingFailed",
-        metrics);
-    DatanodeDetails datanode = MockDatanodeDetails.randomDatanodeDetails();
-    StorageReportProto storageReport = TestUtils.createStorageReport(
-        datanode.getUuid(), "/tmp", 100, 10, 90, null);
-    NodeReportProto nodeReport = NodeReportProto.newBuilder()
-        .addStorageReport(storageReport).build();
-
-    cluster.getStorageContainerManager().getScmNodeManager()
-        .processNodeReport(datanode, nodeReport);
-    assertCounter("NumNodeReportProcessingFailed", nrProcessed + 1,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-  }
-
-  /**
-   * Verify that datanode aggregated state and capacity metrics are reported.
-   */
-  @Test
-  public void testNodeCountAndInfoMetricsReported() throws Exception {
-    HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
-    StorageReportProto storageReport = TestUtils.createStorageReport(
-        datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
-    NodeReportProto nodeReport = NodeReportProto.newBuilder()
-        .addStorageReport(storageReport).build();
-    datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
-    cluster.getStorageContainerManager().getScmNodeManager()
-        .processNodeReport(datanode.getDatanodeDetails(), nodeReport);
-
-    assertGauge("HealthyNodes", 1,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("StaleNodes", 0,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DeadNodes", 0,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DecommissioningNodes", 0,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DecommissionedNodes", 0,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DiskCapacity", 100L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DiskUsed", 10L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("DiskRemaining", 90L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("SSDCapacity", 0L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("SSDUsed", 0L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-    assertGauge("SSDRemaining", 0L,
-        getMetrics(SCMNodeMetrics.class.getSimpleName()));
-  }
-
-  @After
-  public void teardown() {
-    cluster.shutdown();
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org