You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ad...@apache.org on 2022/06/30 19:13:44 UTC
[ozone] branch master updated: HDDS-6959. Fix move timeout in latest iteration metric (#3562)
This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 7838582393 HDDS-6959. Fix move timeout in latest iteration metric (#3562)
7838582393 is described below
commit 7838582393729d694998682df6939d3e35d0c1d4
Author: Ritesh H Shukla <ke...@gmail.com>
AuthorDate: Thu Jun 30 12:13:37 2022 -0700
HDDS-6959. Fix move timeout in latest iteration metric (#3562)
---
.../scm/container/balancer/ContainerBalancer.java | 3 +-
.../balancer/ContainerBalancerMetrics.java | 37 ++++++++++++++++++++++
.../container/replication/ReplicationManager.java | 2 +-
.../container/balancer/TestContainerBalancer.java | 31 ++++++++++++++++++
4 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
index 49dd12b8e3..a77f7a8c05 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
@@ -635,6 +635,8 @@ public class ContainerBalancer extends StatefulService {
future = replicationManager
.move(containerID, source, moveSelection.getTargetNode())
.whenComplete((result, ex) -> {
+
+ metrics.incrementCurrentIterationContainerMoveMetric(result, 1);
if (ex != null) {
LOG.info("Container move for container {} from source {} to " +
"target {} failed with exceptions {}",
@@ -645,7 +647,6 @@ public class ContainerBalancer extends StatefulService {
if (result == LegacyReplicationManager.MoveResult.COMPLETED) {
metrics.incrementDataSizeMovedGBInLatestIteration(
containerInfo.getUsedBytes() / OzoneConsts.GB);
- metrics.incrementNumContainerMovesCompletedInLatestIteration(1);
if (LOG.isDebugEnabled()) {
LOG.debug(
"Container move completed for container {} to target {}",
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
index 3a7ce49ab2..b135c1ca2a 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hdds.scm.container.balancer;
+import org.apache.hadoop.hdds.scm.container.replication.LegacyReplicationManager.MoveResult;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
@@ -121,6 +122,42 @@ public final class ContainerBalancerMetrics {
this.numContainerMovesCompletedInLatestIteration.incr(valueToAdd);
}
+ public void incrementCurrentIterationContainerMoveMetric(
+ MoveResult result,
+ long valueToAdd) {
+ if (result == null) {
+ return;
+ }
+ switch (result) {
+ case COMPLETED:
+ this.numContainerMovesCompletedInLatestIteration.incr(valueToAdd);
+ break;
+ case REPLICATION_FAIL_TIME_OUT:
+ case DELETION_FAIL_TIME_OUT:
+ this.numContainerMovesTimeoutInLatestIteration.incr(valueToAdd);
+ break;
+ // TODO: Add metrics for other errors that need to be tracked.
+ case FAIL_NOT_RUNNING:
+ case REPLICATION_FAIL_INFLIGHT_REPLICATION:
+ case FAIL_NOT_LEADER:
+ case REPLICATION_FAIL_NOT_EXIST_IN_SOURCE:
+ case REPLICATION_FAIL_EXIST_IN_TARGET:
+ case REPLICATION_FAIL_CONTAINER_NOT_CLOSED:
+ case REPLICATION_FAIL_INFLIGHT_DELETION:
+ case REPLICATION_FAIL_NODE_NOT_IN_SERVICE:
+ case DELETION_FAIL_NODE_NOT_IN_SERVICE:
+ case REPLICATION_FAIL_NODE_UNHEALTHY:
+ case DELETION_FAIL_NODE_UNHEALTHY:
+ case DELETE_FAIL_POLICY:
+ case PLACEMENT_POLICY_NOT_SATISFIED:
+ case UNEXPECTED_REMOVE_SOURCE_AT_INFLIGHT_REPLICATION:
+ case UNEXPECTED_REMOVE_TARGET_AT_INFLIGHT_DELETION:
+ case FAIL_CAN_NOT_RECORD_TO_DB:
+ default:
+ break;
+ }
+ }
+
public void resetNumContainerMovesCompletedInLatestIteration() {
numContainerMovesCompletedInLatestIteration.incr(
-getNumContainerMovesCompletedInLatestIteration());
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 5d4da102d9..fc643af51b 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -458,7 +458,7 @@ public class ReplicationManager implements SCMService {
/**
- * following functions will be refactored in a seperate jira.
+ * following functions will be refactored in a separate jira.
*/
public CompletableFuture<LegacyReplicationManager.MoveResult> move(
ContainerID cid, DatanodeDetails src, DatanodeDetails tgt)
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
index d57226bb30..dd27e8f446 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
@@ -739,6 +739,37 @@ public class TestContainerBalancer {
}
+ @Test
+ public void checkIterationResultTimeoutFromReplicationManager()
+ throws NodeNotFoundException, IOException,
+ IllegalContainerBalancerStateException,
+ InvalidContainerBalancerConfigurationException {
+ CompletableFuture<MoveResult> future
+ = CompletableFuture.supplyAsync(() ->
+ MoveResult.REPLICATION_FAIL_TIME_OUT);
+ CompletableFuture<MoveResult> future2
+ = CompletableFuture.supplyAsync(() ->
+ MoveResult.DELETION_FAIL_TIME_OUT);
+ Mockito.when(replicationManager.move(Mockito.any(ContainerID.class),
+ Mockito.any(DatanodeDetails.class),
+ Mockito.any(DatanodeDetails.class)))
+ .thenReturn(future, future2);
+
+ balancerConfiguration.setThreshold(10);
+ balancerConfiguration.setIterations(1);
+ balancerConfiguration.setMaxSizeEnteringTarget(10 * OzoneConsts.GB);
+ balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
+ balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
+ balancerConfiguration.setMoveTimeout(Duration.ofMillis(1000));
+
+ startBalancer(balancerConfiguration);
+ sleepWhileBalancing(2000);
+
+ Assertions.assertTrue(containerBalancer.getMetrics()
+ .getNumContainerMovesTimeoutInLatestIteration() > 0);
+ stopBalancer();
+ }
+
@Test
public void testStartAndImmediateStopForDeadlock()
throws IllegalContainerBalancerStateException, IOException,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org