You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by lj...@apache.org on 2022/03/03 06:50:03 UTC
[ozone] branch master updated: HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)
This is an automated email from the ASF dual-hosted git repository.
ljain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b83c1f9 HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)
b83c1f9 is described below
commit b83c1f9587ed1e49e84428366f58187b9a69444e
Author: Siddhant Sangwan <si...@gmail.com>
AuthorDate: Thu Mar 3 12:19:48 2022 +0530
HDDS-6244. ContainerBalancer metrics don't show updated values in JMX (#3049)
---
.../scm/container/balancer/ContainerBalancer.java | 52 +++++---
.../balancer/ContainerBalancerMetrics.java | 139 ++++++++++++---------
.../container/balancer/TestContainerBalancer.java | 61 ++++-----
3 files changed, 138 insertions(+), 114 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
index 54fb9b8..bd6d3cc 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
@@ -217,6 +217,7 @@ public class ContainerBalancer {
//if no new move option is generated, it means the cluster can
//not be balanced any more , so just stop
IterationResult iR = doIteration();
+ metrics.incrementNumIterations(1);
LOG.info("Result of this iteration of Container Balancer: {}", iR);
if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) {
stop();
@@ -290,15 +291,9 @@ public class ContainerBalancer {
datanodeUsageInfo.getDatanodeDetails()));
this.totalNodesInCluster = datanodeUsageInfos.size();
- this.clusterCapacity = 0L;
- this.clusterUsed = 0L;
- this.clusterRemaining = 0L;
- this.selectedContainers.clear();
- this.overUtilizedNodes.clear();
- this.underUtilizedNodes.clear();
- this.unBalancedNodes.clear();
- this.countDatanodesInvolvedPerIteration = 0;
- this.sizeMovedPerIteration = 0;
+
+ // reset some variables and metrics for this iteration
+ resetState();
clusterAvgUtilisation = calculateAvgUtilization(datanodeUsageInfos);
if (LOG.isDebugEnabled()) {
@@ -336,11 +331,7 @@ public class ContainerBalancer {
}
if (Double.compare(utilization, upperLimit) > 0) {
overUtilizedNodes.add(datanodeUsageInfo);
- metrics.incrementDatanodesNumToBalance(1);
-
- metrics.setMaxDatanodeUtilizedPercentage(Math.max(
- metrics.getMaxDatanodeUtilizedPercentage(),
- ratioToPercent(utilization)));
+ metrics.incrementNumDatanodesUnbalanced(1);
// amount of bytes greater than upper limit in this node
Long overUtilizedBytes = ratioToBytes(
@@ -351,7 +342,7 @@ public class ContainerBalancer {
totalOverUtilizedBytes += overUtilizedBytes;
} else if (Double.compare(utilization, lowerLimit) < 0) {
underUtilizedNodes.add(datanodeUsageInfo);
- metrics.incrementDatanodesNumToBalance(1);
+ metrics.incrementNumDatanodesUnbalanced(1);
// amount of bytes lesser than lower limit in this node
Long underUtilizedBytes = ratioToBytes(
@@ -364,7 +355,7 @@ public class ContainerBalancer {
withinThresholdUtilizedNodes.add(datanodeUsageInfo);
}
}
- metrics.setDataSizeToBalanceGB(
+ metrics.incrementDataSizeUnbalancedGB(
Math.max(totalOverUtilizedBytes, totalUnderUtilizedBytes) /
OzoneConsts.GB);
Collections.reverse(underUtilizedNodes);
@@ -474,7 +465,7 @@ public class ContainerBalancer {
ContainerInfo container =
containerManager.getContainer(moveSelection.getContainerID());
this.sizeMovedPerIteration += container.getUsedBytes();
- metrics.incrementMovedContainersNum(1);
+ metrics.incrementNumMovedContainersInLatestIteration(1);
LOG.info("Move completed for container {} to target {}",
container.containerID(),
moveSelection.getTargetNode().getUuidString());
@@ -485,7 +476,8 @@ public class ContainerBalancer {
}
}
} catch (InterruptedException e) {
- LOG.warn("Container move for container {} was interrupted.",
+ LOG.warn("Interrupted while waiting for container move result for " +
+ "container {}.",
moveSelection.getContainerID(), e);
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
@@ -498,7 +490,9 @@ public class ContainerBalancer {
}
countDatanodesInvolvedPerIteration =
sourceToTargetMap.size() + selectedTargets.size();
- metrics.incrementDataSizeMovedGB(
+ metrics.incrementNumDatanodesInvolvedInLatestIteration(
+ countDatanodesInvolvedPerIteration);
+ metrics.incrementDataSizeMovedGBInLatestIteration(
sizeMovedPerIteration / OzoneConsts.GB);
LOG.info("Number of datanodes involved in this iteration: {}. Size moved " +
"in this iteration: {}B.",
@@ -764,6 +758,26 @@ public class ContainerBalancer {
}
/**
+ * Resets some variables and metrics for this iteration.
+ */
+ private void resetState() {
+ this.clusterCapacity = 0L;
+ this.clusterUsed = 0L;
+ this.clusterRemaining = 0L;
+ this.selectedContainers.clear();
+ this.overUtilizedNodes.clear();
+ this.underUtilizedNodes.clear();
+ this.unBalancedNodes.clear();
+ this.countDatanodesInvolvedPerIteration = 0;
+ this.sizeMovedPerIteration = 0;
+ metrics.resetDataSizeMovedGBInLatestIteration();
+ metrics.resetNumMovedContainersInLatestIteration();
+ metrics.resetNumDatanodesInvolvedInLatestIteration();
+ metrics.resetDataSizeUnbalancedGB();
+ metrics.resetNumDatanodesUnbalanced();
+ }
+
+ /**
* Stops ContainerBalancer.
*/
public void stop() {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
index 984787f..0799844 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
@@ -23,8 +23,7 @@ import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
-import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
/**
* Metrics related to Container Balancer running in SCM.
@@ -37,27 +36,26 @@ public final class ContainerBalancerMetrics {
private final MetricsSystem ms;
- @Metric(about = "The total amount of used space in GigaBytes that needs to " +
- "be balanced.")
- private MutableGaugeLong dataSizeToBalanceGB;
+ @Metric(about = "Amount of Gigabytes that Container Balancer moved" +
+ " in the latest iteration.")
+ private MutableCounterLong dataSizeMovedGBInLatestIteration;
- @Metric(about = "The amount of Giga Bytes that have been moved to achieve " +
- "balance.")
- private MutableGaugeLong dataSizeMovedGB;
+ @Metric(about = "Number of containers that Container Balancer moved" +
+ " in the latest iteration.")
+ private MutableCounterLong numMovedContainersInLatestIteration;
- @Metric(about = "Number of containers that Container Balancer has moved" +
- " until now.")
- private MutableGaugeLong movedContainersNum;
+ @Metric(about = "Number of iterations that Container Balancer has run for.")
+ private MutableCounterLong numIterations;
- @Metric(about = "The total number of datanodes that need to be balanced.")
- private MutableGaugeLong datanodesNumToBalance;
+ @Metric(about = "Number of datanodes that were involved in balancing in the" +
+ " latest iteration.")
+ private MutableCounterLong numDatanodesInvolvedInLatestIteration;
- @Metric(about = "Number of datanodes that Container Balancer has balanced " +
- "until now.")
- private MutableGaugeLong datanodesNumBalanced;
+ @Metric(about = "Amount of data in Gigabytes that is causing unbalance.")
+ private MutableCounterLong dataSizeUnbalancedGB;
- @Metric(about = "Utilisation value of the current maximum utilised datanode.")
- private MutableGaugeInt maxDatanodeUtilizedPercentage;
+ @Metric(about = "Number of unbalanced datanodes.")
+ private MutableCounterLong numDatanodesUnbalanced;
/**
* Create and register metrics named {@link ContainerBalancerMetrics#NAME}
@@ -75,82 +73,101 @@ public final class ContainerBalancerMetrics {
this.ms = ms;
}
- public long getDataSizeToBalanceGB() {
- return dataSizeToBalanceGB.value();
+ /**
+ * Gets the amount of data moved by Container Balancer in the latest
+ * iteration.
+ * @return size in GB
+ */
+ public long getDataSizeMovedGBInLatestIteration() {
+ return dataSizeMovedGBInLatestIteration.value();
+ }
+
+ public void incrementDataSizeMovedGBInLatestIteration(long valueToAdd) {
+ this.dataSizeMovedGBInLatestIteration.incr(valueToAdd);
}
- public void setDataSizeToBalanceGB(long size) {
- this.dataSizeToBalanceGB.set(size);
+ public void resetDataSizeMovedGBInLatestIteration() {
+ dataSizeMovedGBInLatestIteration.incr(
+ -getDataSizeMovedGBInLatestIteration());
}
- public long getDataSizeMovedGB() {
- return dataSizeMovedGB.value();
+ /**
+ * Gets the number of containers moved by Container Balancer in the latest
+ * iteration.
+ * @return number of containers
+ */
+ public long getNumMovedContainersInLatestIteration() {
+ return numMovedContainersInLatestIteration.value();
}
- public void setDataSizeMovedGB(long dataSizeMovedGB) {
- this.dataSizeMovedGB.set(dataSizeMovedGB);
+ public void incrementNumMovedContainersInLatestIteration(long valueToAdd) {
+ this.numMovedContainersInLatestIteration.incr(valueToAdd);
}
- public long incrementDataSizeMovedGB(long valueToAdd) {
- this.dataSizeMovedGB.incr(valueToAdd);
- return this.dataSizeMovedGB.value();
+ public void resetNumMovedContainersInLatestIteration() {
+ numMovedContainersInLatestIteration.incr(
+ -getNumMovedContainersInLatestIteration());
}
- public long getMovedContainersNum() {
- return movedContainersNum.value();
+ /**
+ * Gets the number of iterations that Container Balancer has run for.
+ * @return number of iterations
+ */
+ public long getNumIterations() {
+ return numIterations.value();
}
- public void setMovedContainersNum(long movedContainersNum) {
- this.movedContainersNum.set(movedContainersNum);
+ public void incrementNumIterations(long valueToAdd) {
+ numIterations.incr(valueToAdd);
}
- public long incrementMovedContainersNum(long valueToAdd) {
- this.movedContainersNum.incr(valueToAdd);
- return this.movedContainersNum.value();
+ /**
+ * Gets number of datanodes that were involved in balancing in the latest
+ * iteration.
+ * @return number of datanodes
+ */
+ public long getNumDatanodesInvolvedInLatestIteration() {
+ return numDatanodesInvolvedInLatestIteration.value();
}
- public long getDatanodesNumToBalance() {
- return datanodesNumToBalance.value();
+ public void incrementNumDatanodesInvolvedInLatestIteration(long valueToAdd) {
+ numDatanodesInvolvedInLatestIteration.incr(valueToAdd);
}
- public void setDatanodesNumToBalance(long datanodesNumToBalance) {
- this.datanodesNumToBalance.set(datanodesNumToBalance);
+ public void resetNumDatanodesInvolvedInLatestIteration() {
+ numDatanodesInvolvedInLatestIteration.incr(
+ -getNumDatanodesInvolvedInLatestIteration());
}
/**
- * Add specified valueToAdd to the number of datanodes that need to be
- * balanced.
- *
- * @param valueToAdd number of datanodes to add
+ * Gets the amount of data in Gigabytes that is causing unbalance.
+ * @return size of data as a long value
*/
- public void incrementDatanodesNumToBalance(long valueToAdd) {
- this.datanodesNumToBalance.incr(valueToAdd);
+ public long getDataSizeUnbalancedGB() {
+ return dataSizeUnbalancedGB.value();
}
- public long getDatanodesNumBalanced() {
- return datanodesNumBalanced.value();
+ public void incrementDataSizeUnbalancedGB(long valueToAdd) {
+ dataSizeUnbalancedGB.incr(valueToAdd);
}
- public void setDatanodesNumBalanced(long datanodesNumBalanced) {
- this.datanodesNumBalanced.set(datanodesNumBalanced);
+ public void resetDataSizeUnbalancedGB() {
+ dataSizeUnbalancedGB.incr(-getDataSizeUnbalancedGB());
}
/**
- * Add specified valueToAdd to datanodesNumBalanced.
- *
- * @param valueToAdd The value to add.
- * @return The result after addition.
+ * Gets the number of datanodes that are unbalanced.
+ * @return long value
*/
- public long incrementDatanodesNumBalanced(long valueToAdd) {
- datanodesNumBalanced.incr(valueToAdd);
- return datanodesNumBalanced.value();
+ public long getNumDatanodesUnbalanced() {
+ return numDatanodesUnbalanced.value();
}
- public int getMaxDatanodeUtilizedPercentage() {
- return maxDatanodeUtilizedPercentage.value();
+ public void incrementNumDatanodesUnbalanced(long valueToAdd) {
+ numDatanodesUnbalanced.incr(valueToAdd);
}
- public void setMaxDatanodeUtilizedPercentage(int percentage) {
- this.maxDatanodeUtilizedPercentage.set(percentage);
+ public void resetNumDatanodesUnbalanced() {
+ numDatanodesUnbalanced.incr(-getNumDatanodesUnbalanced());
}
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
index 5debc27..6068c31 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
@@ -54,6 +54,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.event.Level;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -225,15 +226,12 @@ public class TestContainerBalancer {
balancerConfiguration.setThreshold(99.99);
containerBalancer.start(balancerConfiguration);
- // waiting for balance completed.
- // TODO: this is a temporary implementation for now
- // modify this after balancer is fully completed
- try {
- Thread.sleep(100);
- } catch (InterruptedException e) { }
+ sleepWhileBalancing(100);
containerBalancer.stop();
+ ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
Assert.assertEquals(0, containerBalancer.getUnBalancedNodes().size());
+ Assert.assertEquals(0, metrics.getNumDatanodesUnbalanced());
}
/**
@@ -250,16 +248,15 @@ public class TestContainerBalancer {
balancerConfiguration.setIterations(1);
containerBalancer.start(balancerConfiguration);
- // waiting for balance completed.
- // TODO: this is a temporary implementation for now
- // modify this after balancer is fully completed
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) { }
+ sleepWhileBalancing(500);
+ int number = percent * numberOfNodes / 100;
+ ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
Assert.assertFalse(
- containerBalancer.getCountDatanodesInvolvedPerIteration() >
- (percent * numberOfNodes / 100));
+ containerBalancer.getCountDatanodesInvolvedPerIteration() > number);
+ Assert.assertTrue(metrics.getNumDatanodesInvolvedInLatestIteration() > 0);
+ Assert.assertFalse(
+ metrics.getNumDatanodesInvolvedInLatestIteration() > number);
containerBalancer.stop();
}
@@ -316,16 +313,16 @@ public class TestContainerBalancer {
balancerConfiguration.setIterations(1);
containerBalancer.start(balancerConfiguration);
- // waiting for balance completed.
- // TODO: this is a temporary implementation for now
- // modify this after balancer is fully completed
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) { }
+ sleepWhileBalancing(500);
// balancer should not have moved more size than the limit
Assert.assertFalse(containerBalancer.getSizeMovedPerIteration() >
10 * OzoneConsts.GB);
+
+ long size =
+ containerBalancer.getMetrics().getDataSizeMovedGBInLatestIteration();
+ Assert.assertTrue(size > 0);
+ Assert.assertFalse(size > 10);
containerBalancer.stop();
}
@@ -511,29 +508,25 @@ public class TestContainerBalancer {
@Test
public void testMetrics() {
+ conf.set("hdds.datanode.du.refresh.period", "1ms");
+ balancerConfiguration.setBalancingInterval(Duration.ofMillis(2));
balancerConfiguration.setThreshold(10);
balancerConfiguration.setIterations(1);
- balancerConfiguration.setMaxSizeEnteringTarget(10 * OzoneConsts.GB);
- balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
+ balancerConfiguration.setMaxSizeEnteringTarget(6 * OzoneConsts.GB);
+ // deliberately set max size per iteration to a low value, 6GB
+ balancerConfiguration.setMaxSizeToMovePerIteration(6 * OzoneConsts.GB);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
containerBalancer.start(balancerConfiguration);
+ sleepWhileBalancing(500);
- // waiting for balance completed.
- // TODO: this is a temporary implementation for now
- // modify this after balancer is fully completed
- try {
- Thread.sleep(500);
- } catch (InterruptedException e) { }
-
- containerBalancer.stop();
ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
Assert.assertEquals(determineExpectedUnBalancedNodes(
balancerConfiguration.getThreshold()).size(),
- metrics.getDatanodesNumToBalance());
- Assert.assertEquals(ContainerBalancer.ratioToPercent(
- nodeUtilizations.get(nodeUtilizations.size() - 1)),
- metrics.getMaxDatanodeUtilizedPercentage());
+ metrics.getNumDatanodesUnbalanced());
+ Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6);
+ Assert.assertEquals(1, metrics.getNumIterations());
+ containerBalancer.stop();
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org