You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by lj...@apache.org on 2022/03/03 06:50:03 UTC

[ozone] branch master updated: HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)

This is an automated email from the ASF dual-hosted git repository.

ljain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new b83c1f9  HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)
b83c1f9 is described below

commit b83c1f9587ed1e49e84428366f58187b9a69444e
Author: Siddhant Sangwan <si...@gmail.com>
AuthorDate: Thu Mar 3 12:19:48 2022 +0530

    HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)
---
 .../scm/container/balancer/ContainerBalancer.java  |  52 +++++---
 .../balancer/ContainerBalancerMetrics.java         | 139 ++++++++++++---------
 .../container/balancer/TestContainerBalancer.java  |  61 ++++-----
 3 files changed, 138 insertions(+), 114 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
index 54fb9b8..bd6d3cc 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
@@ -217,6 +217,7 @@ public class ContainerBalancer {
       //if no new move option is generated, it means the cluster can
       //not be balanced any more , so just stop
       IterationResult iR = doIteration();
+      metrics.incrementNumIterations(1);
       LOG.info("Result of this iteration of Container Balancer: {}", iR);
       if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) {
         stop();
@@ -290,15 +291,9 @@ public class ContainerBalancer {
         datanodeUsageInfo.getDatanodeDetails()));
 
     this.totalNodesInCluster = datanodeUsageInfos.size();
-    this.clusterCapacity = 0L;
-    this.clusterUsed = 0L;
-    this.clusterRemaining = 0L;
-    this.selectedContainers.clear();
-    this.overUtilizedNodes.clear();
-    this.underUtilizedNodes.clear();
-    this.unBalancedNodes.clear();
-    this.countDatanodesInvolvedPerIteration = 0;
-    this.sizeMovedPerIteration = 0;
+
+    // reset some variables and metrics for this iteration
+    resetState();
 
     clusterAvgUtilisation = calculateAvgUtilization(datanodeUsageInfos);
     if (LOG.isDebugEnabled()) {
@@ -336,11 +331,7 @@ public class ContainerBalancer {
       }
       if (Double.compare(utilization, upperLimit) > 0) {
         overUtilizedNodes.add(datanodeUsageInfo);
-        metrics.incrementDatanodesNumToBalance(1);
-
-        metrics.setMaxDatanodeUtilizedPercentage(Math.max(
-            metrics.getMaxDatanodeUtilizedPercentage(),
-            ratioToPercent(utilization)));
+        metrics.incrementNumDatanodesUnbalanced(1);
 
         // amount of bytes greater than upper limit in this node
         Long overUtilizedBytes = ratioToBytes(
@@ -351,7 +342,7 @@ public class ContainerBalancer {
         totalOverUtilizedBytes += overUtilizedBytes;
       } else if (Double.compare(utilization, lowerLimit) < 0) {
         underUtilizedNodes.add(datanodeUsageInfo);
-        metrics.incrementDatanodesNumToBalance(1);
+        metrics.incrementNumDatanodesUnbalanced(1);
 
         // amount of bytes lesser than lower limit in this node
         Long underUtilizedBytes = ratioToBytes(
@@ -364,7 +355,7 @@ public class ContainerBalancer {
         withinThresholdUtilizedNodes.add(datanodeUsageInfo);
       }
     }
-    metrics.setDataSizeToBalanceGB(
+    metrics.incrementDataSizeUnbalancedGB(
         Math.max(totalOverUtilizedBytes, totalUnderUtilizedBytes) /
             OzoneConsts.GB);
     Collections.reverse(underUtilizedNodes);
@@ -474,7 +465,7 @@ public class ContainerBalancer {
             ContainerInfo container =
                 containerManager.getContainer(moveSelection.getContainerID());
             this.sizeMovedPerIteration += container.getUsedBytes();
-            metrics.incrementMovedContainersNum(1);
+            metrics.incrementNumMovedContainersInLatestIteration(1);
             LOG.info("Move completed for container {} to target {}",
                 container.containerID(),
                 moveSelection.getTargetNode().getUuidString());
@@ -485,7 +476,8 @@ public class ContainerBalancer {
           }
         }
       } catch (InterruptedException e) {
-        LOG.warn("Container move for container {} was interrupted.",
+        LOG.warn("Interrupted while waiting for container move result for " +
+                "container {}.",
             moveSelection.getContainerID(), e);
         Thread.currentThread().interrupt();
       } catch (ExecutionException e) {
@@ -498,7 +490,9 @@ public class ContainerBalancer {
     }
     countDatanodesInvolvedPerIteration =
         sourceToTargetMap.size() + selectedTargets.size();
-    metrics.incrementDataSizeMovedGB(
+    metrics.incrementNumDatanodesInvolvedInLatestIteration(
+        countDatanodesInvolvedPerIteration);
+    metrics.incrementDataSizeMovedGBInLatestIteration(
         sizeMovedPerIteration / OzoneConsts.GB);
     LOG.info("Number of datanodes involved in this iteration: {}. Size moved " +
             "in this iteration: {}B.",
@@ -764,6 +758,26 @@ public class ContainerBalancer {
   }
 
   /**
+   * Resets some variables and metrics for this iteration.
+   */
+  private void resetState() {
+    this.clusterCapacity = 0L;
+    this.clusterUsed = 0L;
+    this.clusterRemaining = 0L;
+    this.selectedContainers.clear();
+    this.overUtilizedNodes.clear();
+    this.underUtilizedNodes.clear();
+    this.unBalancedNodes.clear();
+    this.countDatanodesInvolvedPerIteration = 0;
+    this.sizeMovedPerIteration = 0;
+    metrics.resetDataSizeMovedGBInLatestIteration();
+    metrics.resetNumMovedContainersInLatestIteration();
+    metrics.resetNumDatanodesInvolvedInLatestIteration();
+    metrics.resetDataSizeUnbalancedGB();
+    metrics.resetNumDatanodesUnbalanced();
+  }
+
+  /**
    * Stops ContainerBalancer.
    */
   public void stop() {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
index 984787f..0799844 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
@@ -23,8 +23,7 @@ import org.apache.hadoop.metrics2.MetricsSystem;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
-import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
 
 /**
  * Metrics related to Container Balancer running in SCM.
@@ -37,27 +36,26 @@ public final class ContainerBalancerMetrics {
 
   private final MetricsSystem ms;
 
-  @Metric(about = "The total amount of used space in GigaBytes that needs to " +
-      "be balanced.")
-  private MutableGaugeLong dataSizeToBalanceGB;
+  @Metric(about = "Amount of Gigabytes that Container Balancer moved" +
+      " in the latest iteration.")
+  private MutableCounterLong dataSizeMovedGBInLatestIteration;
 
-  @Metric(about = "The amount of Giga Bytes that have been moved to achieve " +
-      "balance.")
-  private MutableGaugeLong dataSizeMovedGB;
+  @Metric(about = "Number of containers that Container Balancer moved" +
+      " in the latest iteration.")
+  private MutableCounterLong numMovedContainersInLatestIteration;
 
-  @Metric(about = "Number of containers that Container Balancer has moved" +
-      " until now.")
-  private MutableGaugeLong movedContainersNum;
+  @Metric(about = "Number of iterations that Container Balancer has run for.")
+  private MutableCounterLong numIterations;
 
-  @Metric(about = "The total number of datanodes that need to be balanced.")
-  private MutableGaugeLong datanodesNumToBalance;
+  @Metric(about = "Number of datanodes that were involved in balancing in the" +
+      " latest iteration.")
+  private MutableCounterLong numDatanodesInvolvedInLatestIteration;
 
-  @Metric(about = "Number of datanodes that Container Balancer has balanced " +
-      "until now.")
-  private MutableGaugeLong datanodesNumBalanced;
+  @Metric(about = "Amount of data in Gigabytes that is causing unbalance.")
+  private MutableCounterLong dataSizeUnbalancedGB;
 
-  @Metric(about = "Utilisation value of the current maximum utilised datanode.")
-  private MutableGaugeInt maxDatanodeUtilizedPercentage;
+  @Metric(about = "Number of unbalanced datanodes.")
+  private MutableCounterLong numDatanodesUnbalanced;
 
   /**
    * Create and register metrics named {@link ContainerBalancerMetrics#NAME}
@@ -75,82 +73,101 @@ public final class ContainerBalancerMetrics {
     this.ms = ms;
   }
 
-  public long getDataSizeToBalanceGB() {
-    return dataSizeToBalanceGB.value();
+  /**
+   * Gets the amount of data moved by Container Balancer in the latest
+   * iteration.
+   * @return size in GB
+   */
+  public long getDataSizeMovedGBInLatestIteration() {
+    return dataSizeMovedGBInLatestIteration.value();
+  }
+
+  public void incrementDataSizeMovedGBInLatestIteration(long valueToAdd) {
+    this.dataSizeMovedGBInLatestIteration.incr(valueToAdd);
   }
 
-  public void setDataSizeToBalanceGB(long size) {
-    this.dataSizeToBalanceGB.set(size);
+  public void resetDataSizeMovedGBInLatestIteration() {
+    dataSizeMovedGBInLatestIteration.incr(
+        -getDataSizeMovedGBInLatestIteration());
   }
 
-  public long getDataSizeMovedGB() {
-    return dataSizeMovedGB.value();
+  /**
+   * Gets the number of containers moved by Container Balancer in the latest
+   * iteration.
+   * @return number of containers
+   */
+  public long getNumMovedContainersInLatestIteration() {
+    return numMovedContainersInLatestIteration.value();
   }
 
-  public void setDataSizeMovedGB(long dataSizeMovedGB) {
-    this.dataSizeMovedGB.set(dataSizeMovedGB);
+  public void incrementNumMovedContainersInLatestIteration(long valueToAdd) {
+    this.numMovedContainersInLatestIteration.incr(valueToAdd);
   }
 
-  public long incrementDataSizeMovedGB(long valueToAdd) {
-    this.dataSizeMovedGB.incr(valueToAdd);
-    return this.dataSizeMovedGB.value();
+  public void resetNumMovedContainersInLatestIteration() {
+    numMovedContainersInLatestIteration.incr(
+        -getNumMovedContainersInLatestIteration());
   }
 
-  public long getMovedContainersNum() {
-    return movedContainersNum.value();
+  /**
+   * Gets the number of iterations that Container Balancer has run for.
+   * @return number of iterations
+   */
+  public long getNumIterations() {
+    return numIterations.value();
   }
 
-  public void setMovedContainersNum(long movedContainersNum) {
-    this.movedContainersNum.set(movedContainersNum);
+  public void incrementNumIterations(long valueToAdd) {
+    numIterations.incr(valueToAdd);
   }
 
-  public long incrementMovedContainersNum(long valueToAdd) {
-    this.movedContainersNum.incr(valueToAdd);
-    return this.movedContainersNum.value();
+  /**
+   * Gets number of datanodes that were involved in balancing in the latest
+   * iteration.
+   * @return number of datanodes
+   */
+  public long getNumDatanodesInvolvedInLatestIteration() {
+    return numDatanodesInvolvedInLatestIteration.value();
   }
 
-  public long getDatanodesNumToBalance() {
-    return datanodesNumToBalance.value();
+  public void incrementNumDatanodesInvolvedInLatestIteration(long valueToAdd) {
+    numDatanodesInvolvedInLatestIteration.incr(valueToAdd);
   }
 
-  public void setDatanodesNumToBalance(long datanodesNumToBalance) {
-    this.datanodesNumToBalance.set(datanodesNumToBalance);
+  public void resetNumDatanodesInvolvedInLatestIteration() {
+    numDatanodesInvolvedInLatestIteration.incr(
+        -getNumDatanodesInvolvedInLatestIteration());
   }
 
   /**
-   * Add specified valueToAdd to the number of datanodes that need to be
-   * balanced.
-   *
-   * @param valueToAdd number of datanodes to add
+   * Gets the amount of data in Gigabytes that is causing unbalance.
+   * @return size of data as a long value
    */
-  public void incrementDatanodesNumToBalance(long valueToAdd) {
-    this.datanodesNumToBalance.incr(valueToAdd);
+  public long getDataSizeUnbalancedGB() {
+    return dataSizeUnbalancedGB.value();
   }
 
-  public long getDatanodesNumBalanced() {
-    return datanodesNumBalanced.value();
+  public void incrementDataSizeUnbalancedGB(long valueToAdd) {
+    dataSizeUnbalancedGB.incr(valueToAdd);
   }
 
-  public void setDatanodesNumBalanced(long datanodesNumBalanced) {
-    this.datanodesNumBalanced.set(datanodesNumBalanced);
+  public void resetDataSizeUnbalancedGB() {
+    dataSizeUnbalancedGB.incr(-getDataSizeUnbalancedGB());
   }
 
   /**
-   * Add specified valueToAdd to datanodesNumBalanced.
-   *
-   * @param valueToAdd The value to add.
-   * @return The result after addition.
+   * Gets the number of datanodes that are unbalanced.
+   * @return long value
    */
-  public long incrementDatanodesNumBalanced(long valueToAdd) {
-    datanodesNumBalanced.incr(valueToAdd);
-    return datanodesNumBalanced.value();
+  public long getNumDatanodesUnbalanced() {
+    return numDatanodesUnbalanced.value();
   }
 
-  public int getMaxDatanodeUtilizedPercentage() {
-    return maxDatanodeUtilizedPercentage.value();
+  public void incrementNumDatanodesUnbalanced(long valueToAdd) {
+    numDatanodesUnbalanced.incr(valueToAdd);
   }
 
-  public void setMaxDatanodeUtilizedPercentage(int percentage) {
-    this.maxDatanodeUtilizedPercentage.set(percentage);
+  public void resetNumDatanodesUnbalanced() {
+    numDatanodesUnbalanced.incr(-getNumDatanodesUnbalanced());
   }
 }
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
index 5debc27..6068c31 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
@@ -54,6 +54,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.event.Level;
 
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -225,15 +226,12 @@ public class TestContainerBalancer {
     balancerConfiguration.setThreshold(99.99);
     containerBalancer.start(balancerConfiguration);
 
-    // waiting for balance completed.
-    // TODO: this is a temporary implementation for now
-    // modify this after balancer is fully completed
-    try {
-      Thread.sleep(100);
-    } catch (InterruptedException e) { }
+    sleepWhileBalancing(100);
 
     containerBalancer.stop();
+    ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
     Assert.assertEquals(0, containerBalancer.getUnBalancedNodes().size());
+    Assert.assertEquals(0, metrics.getNumDatanodesUnbalanced());
   }
 
   /**
@@ -250,16 +248,15 @@ public class TestContainerBalancer {
     balancerConfiguration.setIterations(1);
     containerBalancer.start(balancerConfiguration);
 
-    // waiting for balance completed.
-    // TODO: this is a temporary implementation for now
-    // modify this after balancer is fully completed
-    try {
-      Thread.sleep(1000);
-    } catch (InterruptedException e) { }
+    sleepWhileBalancing(500);
 
+    int number = percent * numberOfNodes / 100;
+    ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
     Assert.assertFalse(
-        containerBalancer.getCountDatanodesInvolvedPerIteration() >
-            (percent * numberOfNodes / 100));
+        containerBalancer.getCountDatanodesInvolvedPerIteration() > number);
+    Assert.assertTrue(metrics.getNumDatanodesInvolvedInLatestIteration() > 0);
+    Assert.assertFalse(
+        metrics.getNumDatanodesInvolvedInLatestIteration() > number);
     containerBalancer.stop();
   }
 
@@ -316,16 +313,16 @@ public class TestContainerBalancer {
     balancerConfiguration.setIterations(1);
     containerBalancer.start(balancerConfiguration);
 
-    // waiting for balance completed.
-    // TODO: this is a temporary implementation for now
-    // modify this after balancer is fully completed
-    try {
-      Thread.sleep(1000);
-    } catch (InterruptedException e) { }
+    sleepWhileBalancing(500);
 
     // balancer should not have moved more size than the limit
     Assert.assertFalse(containerBalancer.getSizeMovedPerIteration() >
         10 * OzoneConsts.GB);
+
+    long size =
+        containerBalancer.getMetrics().getDataSizeMovedGBInLatestIteration();
+    Assert.assertTrue(size > 0);
+    Assert.assertFalse(size > 10);
     containerBalancer.stop();
   }
 
@@ -511,29 +508,25 @@ public class TestContainerBalancer {
 
   @Test
   public void testMetrics() {
+    conf.set("hdds.datanode.du.refresh.period", "1ms");
+    balancerConfiguration.setBalancingInterval(Duration.ofMillis(2));
     balancerConfiguration.setThreshold(10);
     balancerConfiguration.setIterations(1);
-    balancerConfiguration.setMaxSizeEnteringTarget(10 * OzoneConsts.GB);
-    balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
+    balancerConfiguration.setMaxSizeEnteringTarget(6 * OzoneConsts.GB);
+    // deliberately set max size per iteration to a low value, 6GB
+    balancerConfiguration.setMaxSizeToMovePerIteration(6 * OzoneConsts.GB);
     balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
 
     containerBalancer.start(balancerConfiguration);
+    sleepWhileBalancing(500);
 
-    // waiting for balance completed.
-    // TODO: this is a temporary implementation for now
-    // modify this after balancer is fully completed
-    try {
-      Thread.sleep(500);
-    } catch (InterruptedException e) { }
-
-    containerBalancer.stop();
     ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
     Assert.assertEquals(determineExpectedUnBalancedNodes(
             balancerConfiguration.getThreshold()).size(),
-        metrics.getDatanodesNumToBalance());
-    Assert.assertEquals(ContainerBalancer.ratioToPercent(
-            nodeUtilizations.get(nodeUtilizations.size() - 1)),
-        metrics.getMaxDatanodeUtilizedPercentage());
+        metrics.getNumDatanodesUnbalanced());
+    Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6);
+    Assert.assertEquals(1, metrics.getNumIterations());
+    containerBalancer.stop();
   }
 
   /**

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org