You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2019/09/27 04:48:46 UTC

[hadoop] branch trunk updated: HDD-2193. Adding container related metrics in SCM.

This is an automated email from the ASF dual-hosted git repository.

aengineer pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new b6ef8cc  HDD-2193. Adding container related metrics in SCM.
b6ef8cc is described below

commit b6ef8cc1205d93a3c19d5f052b593758503d689c
Author: Bharat Viswanadham <bh...@apache.org>
AuthorDate: Thu Sep 26 15:22:32 2019 -0700

    HDD-2193. Adding container related metrics in SCM.
    
    Signed-off-by: Anu Engineer <ae...@apache.org>
---
 .../hdds/scm/container/SCMContainerManager.java    |  31 +++++-
 .../metrics/SCMContainerManagerMetrics.java        |  90 +++++++++++++++++
 .../hdds/scm/container/metrics/package-info.java   |  22 ++++
 .../metrics/TestSCMContainerManagerMetrics.java    | 112 +++++++++++++++++++++
 4 files changed, 252 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/SCMContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/SCMContainerManager.java
index 3c44c4e..8f82b57 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/SCMContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/SCMContainerManager.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.scm.container.metrics.SCMContainerManagerMetrics;
 import org.apache.hadoop.hdds.scm.exceptions.SCMException;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@@ -72,6 +73,8 @@ public class SCMContainerManager implements ContainerManager {
   private final ContainerStateManager containerStateManager;
   private final int numContainerPerOwnerInPipeline;
 
+  private final SCMContainerManagerMetrics scmContainerManagerMetrics;
+
   /**
    * Constructs a mapping class that creates mapping between container names
    * and pipelines.
@@ -109,6 +112,8 @@ public class SCMContainerManager implements ContainerManager {
             ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
 
     loadExistingContainers();
+
+    scmContainerManagerMetrics = SCMContainerManagerMetrics.create();
   }
 
   private void loadExistingContainers() throws IOException {
@@ -204,6 +209,7 @@ public class SCMContainerManager implements ContainerManager {
       int count) {
     lock.lock();
     try {
+      scmContainerManagerMetrics.incNumListContainersOps();
       final long startId = startContainerID == null ?
           0 : startContainerID.getId();
       final List<ContainerID> containersIds =
@@ -241,11 +247,17 @@ public class SCMContainerManager implements ContainerManager {
   public ContainerInfo allocateContainer(final ReplicationType type,
       final ReplicationFactor replicationFactor, final String owner)
       throws IOException {
-    lock.lock();
     try {
-      final ContainerInfo containerInfo =
-          containerStateManager.allocateContainer(pipelineManager, type,
+      lock.lock();
+      ContainerInfo containerInfo = null;
+      try {
+        containerInfo =
+            containerStateManager.allocateContainer(pipelineManager, type,
               replicationFactor, owner);
+      } catch (IOException ex) {
+        scmContainerManagerMetrics.incNumFailureCreateContainers();
+        throw ex;
+      }
       // Add container to DB.
       try {
         addContainerToDB(containerInfo);
@@ -286,7 +298,9 @@ public class SCMContainerManager implements ContainerManager {
         LOG.warn("Unable to remove the container {} from container store," +
                 " it's missing!", containerID);
       }
+      scmContainerManagerMetrics.incNumSuccessfulDeleteContainers();
     } catch (ContainerNotFoundException cnfe) {
+      scmContainerManagerMetrics.incNumFailureDeleteContainers();
       throw new SCMException(
           "Failed to delete container " + containerID + ", reason : " +
               "container doesn't exist.",
@@ -447,9 +461,16 @@ public class SCMContainerManager implements ContainerManager {
           containerInfo.getContainerID());
       containerStore.put(containerIDBytes,
           containerInfo.getProtobuf().toByteArray());
+      // Incrementing here, as allocateBlock to create a container calls
+      // getMatchingContainer() and finally calls this API to add newly
+      // created container to DB.
+      // Even allocateContainer calls this API to add newly allocated
+      // container to DB. So we need to increment metrics here.
+      scmContainerManagerMetrics.incNumSuccessfulCreateContainers();
     } catch (IOException ex) {
       // If adding to containerStore fails, we should remove the container
       // from in-memory map.
+      scmContainerManagerMetrics.incNumFailureCreateContainers();
       LOG.error("Add Container to DB failed for ContainerID #{}",
           containerInfo.getContainerID());
       try {
@@ -546,5 +567,9 @@ public class SCMContainerManager implements ContainerManager {
     if (containerStore != null) {
       containerStore.close();
     }
+
+    if (scmContainerManagerMetrics != null) {
+      this.scmContainerManagerMetrics.unRegister();
+    }
   }
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/SCMContainerManagerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/SCMContainerManagerMetrics.java
new file mode 100644
index 0000000..d2cc9e1
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/SCMContainerManagerMetrics.java
@@ -0,0 +1,90 @@
+package org.apache.hadoop.hdds.scm.container.metrics;
+
+
+import org.apache.hadoop.metrics2.MetricsSystem;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+
+/**
+ * Class contains metrics related to ContainerManager.
+ */
+@Metrics(about = "SCM ContainerManager metrics", context = "ozone")
+public final class SCMContainerManagerMetrics {
+
+  private static final String SOURCE_NAME =
+      SCMContainerManagerMetrics.class.getSimpleName();
+
+  // These are the metrics which will be reset to zero after restart.
+  // These metrics capture count of number of successful/failure operations
+  // of create/delete containers in SCM.
+
+  private @Metric MutableCounterLong numSuccessfulCreateContainers;
+  private @Metric MutableCounterLong numFailureCreateContainers;
+  private @Metric MutableCounterLong numSuccessfulDeleteContainers;
+  private @Metric MutableCounterLong numFailureDeleteContainers;
+  private @Metric MutableCounterLong numListContainerOps;
+
+  private SCMContainerManagerMetrics() {
+  }
+
+  /**
+   * Create and return metrics instance.
+   * @return SCMContainerManagerMetrics
+   */
+  public static SCMContainerManagerMetrics create() {
+    MetricsSystem ms = DefaultMetricsSystem.instance();
+    return ms.register(SOURCE_NAME, "SCM ContainerManager Metrics",
+        new SCMContainerManagerMetrics());
+  }
+
+  /**
+   * Unregister metrics.
+   */
+  public void unRegister() {
+    MetricsSystem ms = DefaultMetricsSystem.instance();
+    ms.unregisterSource(SOURCE_NAME);
+  }
+
+  public void incNumSuccessfulCreateContainers() {
+    this.numSuccessfulCreateContainers.incr();
+  }
+
+  public void incNumFailureCreateContainers() {
+    this.numFailureCreateContainers.incr();
+  }
+
+  public void incNumSuccessfulDeleteContainers() {
+    this.numSuccessfulDeleteContainers.incr();
+  }
+
+  public void incNumFailureDeleteContainers() {
+    this.numFailureDeleteContainers.incr();
+  }
+
+  public void incNumListContainersOps() {
+    this.numListContainerOps.incr();
+  }
+
+  public long getNumSuccessfulCreateContainers() {
+    return numSuccessfulCreateContainers.value();
+  }
+
+  public long getNumFailureCreateContainers() {
+    return numFailureCreateContainers.value();
+  }
+
+  public long getNumSuccessfulDeleteContainers() {
+    return numSuccessfulDeleteContainers.value();
+  }
+
+  public long getNumFailureDeleteContainers() {
+    return numFailureDeleteContainers.value();
+  }
+
+  public long getNumListContainersOps() {
+    return numListContainerOps.value();
+  }
+
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/package-info.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/package-info.java
new file mode 100644
index 0000000..3198de1
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/metrics/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.hdds.scm.container.metrics;
+
+/*
+ * This package contains StorageContainerManager metric classes.
+ */
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java
new file mode 100644
index 0000000..349184f
--- /dev/null
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java
@@ -0,0 +1,112 @@
+package org.apache.hadoop.hdds.scm.container.metrics;
+
+import org.apache.commons.lang3.RandomUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.XceiverClientManager;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerManager;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import static org.junit.Assert.fail;
+
+/**
+ * Class used to test {@link SCMContainerManagerMetrics}.
+ */
+public class TestSCMContainerManagerMetrics {
+
+  private MiniOzoneCluster cluster;
+  private StorageContainerManager scm;
+  private XceiverClientManager xceiverClientManager;
+  private String containerOwner = "OZONE";
+
+  @Before
+  public void setup() throws Exception {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
+    cluster.waitForClusterToBeReady();
+    scm = cluster.getStorageContainerManager();
+    xceiverClientManager = new XceiverClientManager(conf);
+  }
+
+
+  @After
+  public void teardown() {
+    cluster.shutdown();
+  }
+
+  @Test
+  public void testContainerOpsMetrics() throws IOException {
+    MetricsRecordBuilder metrics;
+    ContainerManager containerManager = scm.getContainerManager();
+    metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+
+    long numSuccessfulCreateContainers = getLongCounter(
+        "NumSuccessfulCreateContainers", metrics);
+
+    ContainerInfo containerInfo = containerManager.allocateContainer(
+        HddsProtos.ReplicationType.RATIS,
+        HddsProtos.ReplicationFactor.ONE, containerOwner);
+
+    metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+    Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
+        metrics), ++numSuccessfulCreateContainers);
+
+    try {
+      containerManager.allocateContainer(
+          HddsProtos.ReplicationType.RATIS,
+          HddsProtos.ReplicationFactor.THREE, containerOwner);
+      fail("testContainerOpsMetrics failed");
+    } catch (IOException ex) {
+      // Here it should fail, so it should have the old metric value.
+      metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+      Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
+          metrics), numSuccessfulCreateContainers);
+      Assert.assertEquals(getLongCounter("NumFailureCreateContainers",
+          metrics), 1);
+    }
+
+    metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+    long numSuccessfulDeleteContainers = getLongCounter(
+        "NumSuccessfulDeleteContainers", metrics);
+
+    containerManager.deleteContainer(
+        new ContainerID(containerInfo.getContainerID()));
+
+    metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+    Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
+        metrics), numSuccessfulDeleteContainers + 1);
+
+
+    try {
+      // Give random container to delete.
+      containerManager.deleteContainer(
+          new ContainerID(RandomUtils.nextLong(10000, 20000)));
+      fail("testContainerOpsMetrics failed");
+    } catch (IOException ex) {
+      // Here it should fail, so it should have the old metric value.
+      metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+      Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
+          metrics), numSuccessfulCreateContainers);
+      Assert.assertEquals(getLongCounter("NumFailureDeleteContainers",
+          metrics), 1);
+    }
+
+    containerManager.listContainer(
+        new ContainerID(containerInfo.getContainerID()), 1);
+    metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
+    Assert.assertEquals(getLongCounter("NumListContainerOps",
+        metrics), 1);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org