You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by so...@apache.org on 2022/11/17 15:51:58 UTC
[ozone] branch master updated: HDDS-7488. EC: ReplicationManager: Move Mis-Replicated into a separate unhealthy state (#3956)
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 7e38bfe164 HDDS-7488. EC: ReplicationManager: Move Mis-Replicated into a separate unhealthy state (#3956)
7e38bfe164 is described below
commit 7e38bfe1647a55e038aa9e8b174c7a08555b092f
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Thu Nov 17 15:51:52 2022 +0000
HDDS-7488. EC: ReplicationManager: Move Mis-Replicated into a separate unhealthy state (#3956)
---
.../replication/ContainerHealthResult.java | 98 +++--------
.../replication/ECOverReplicationHandler.java | 2 +-
.../replication/ECUnderReplicationHandler.java | 2 +-
.../container/replication/ReplicationManager.java | 27 ++-
.../container/replication/ReplicationQueue.java | 17 ++
.../health/ECReplicationCheckHandler.java | 63 ++++++-
.../health/RatisReplicationCheckHandler.java | 54 +++---
.../hdds/scm/server/StorageContainerManager.java | 1 +
.../replication/TestECOverReplicationHandler.java | 10 +-
.../replication/TestECUnderReplicationHandler.java | 18 +-
.../replication/TestLegacyReplicationManager.java | 25 +--
.../replication/TestReplicationManager.java | 15 +-
.../health/TestECReplicationCheckHandler.java | 188 +++++++++++++++++++--
.../health/TestRatisReplicationCheckHandler.java | 70 ++++----
14 files changed, 405 insertions(+), 185 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
index b438d6de4c..5d80268ef4 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
@@ -34,7 +34,8 @@ public class ContainerHealthResult {
HEALTHY,
UNHEALTHY,
UNDER_REPLICATED,
- OVER_REPLICATED
+ OVER_REPLICATED,
+ MIS_REPLICATED
}
private final ContainerInfo containerInfo;
@@ -106,9 +107,6 @@ public class ContainerHealthResult {
private final int remainingRedundancy;
private final boolean dueToDecommission;
private final boolean sufficientlyReplicatedAfterPending;
- private boolean dueToMisReplication = false;
- private boolean isMisReplicated = false;
- private boolean isMisReplicatedAfterPending = false;
private final boolean unrecoverable;
private int requeueCount = 0;
@@ -122,44 +120,6 @@ public class ContainerHealthResult {
this.unrecoverable = unrecoverable;
}
- /**
- * Pass true to indicate the container is mis-replicated - ie it does not
- * meet the placement policy.
- * @param isMisRep True if the container is mis-replicated, false if not.
- * @return this object to allow calls to be chained
- */
- public UnderReplicatedHealthResult
- setMisReplicated(boolean isMisRep) {
- this.isMisReplicated = isMisRep;
- return this;
- }
-
- /**
- * Pass true to indicate the container is mis-replicated after considering
- * pending replicas scheduled for create or delete.
- * @param isMisRep True if the container is mis-replicated considering
- * pending replicas, or false if not.
- * @return this object to allow calls to be chained
- */
- public UnderReplicatedHealthResult
- setMisReplicatedAfterPending(boolean isMisRep) {
- this.isMisReplicatedAfterPending = isMisRep;
- return this;
- }
-
- /**
- * If the container is ONLY under replicated due to mis-replication, pass
- * true, otherwise pass false.
- * @param dueToMisRep Pass true if the container has enough replicas but
- * does not meet the placement policy.
- * @return
- */
- public UnderReplicatedHealthResult
- setDueToMisReplication(boolean dueToMisRep) {
- this.dueToMisReplication = dueToMisRep;
- return this;
- }
-
/**
* How many more replicas can be lost before the container is
* unreadable. For containers which are under-replicated due to decommission
@@ -224,38 +184,10 @@ public class ContainerHealthResult {
* @return True if the under-replication is corrected by the pending
* replicas. False otherwise.
*/
- public boolean isSufficientlyReplicatedAfterPending() {
+ public boolean isReplicatedOkAfterPending() {
return sufficientlyReplicatedAfterPending;
}
- /**
- * Returns true if the container is mis-replicated, ignoring any pending
- * replicas scheduled to be created.
- * @return True if mis-replicated, ignoring pending
- */
- public boolean isMisReplicated() {
- return isMisReplicated;
- }
-
- /**
- * Returns true if the container is mis-replicated after taking account of
- * pending replicas, which are schedule to be created.
- * @return true is mis-replicated after pending.
- */
- public boolean isMisReplicatedAfterPending() {
- return isMisReplicatedAfterPending;
- }
-
- /**
- * Returns true if the under replication is only due to mis-replication.
- * In other words, the container has enough replicas, but they do not meet
- * the placement policy.
- * @return true if the under-replication is only due to mis-replication
- */
- public boolean isDueToMisReplication() {
- return dueToMisReplication;
- }
-
/**
* Indicates whether a container has enough replicas to be read. For Ratis
* at least one replia must be available. For EC, at least dataNum replicas
@@ -268,6 +200,28 @@ public class ContainerHealthResult {
}
}
+ /**
+ * Class to represent a container healthy state which is mis-Replicated. This
+ * means the container is neither over nor under replicated, but its replicas
+ * don't meet the requirements of the container placement policy. Eg the
+ * containers are not spread across enough racks.
+ */
+ public static class MisReplicatedHealthResult
+ extends ContainerHealthResult {
+
+ private final boolean replicatedOkAfterPending;
+
+ public MisReplicatedHealthResult(ContainerInfo containerInfo,
+ boolean replicatedOkAfterPending) {
+ super(containerInfo, HealthState.MIS_REPLICATED);
+ this.replicatedOkAfterPending = replicatedOkAfterPending;
+ }
+
+ public boolean isReplicatedOkAfterPending() {
+ return replicatedOkAfterPending;
+ }
+ }
+
/**
* Class for Over Replicated Container Health Results.
*/
@@ -304,7 +258,7 @@ public class ContainerHealthResult {
* @return True if the over-replication is corrected by the pending
* deletes. False otherwise.
*/
- public boolean isSufficientlyReplicatedAfterPending() {
+ public boolean isReplicatedOkAfterPending() {
return sufficientlyReplicatedAfterPending;
}
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java
index 4fba8be175..df7e839010 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java
@@ -99,7 +99,7 @@ public class ECOverReplicationHandler extends AbstractOverReplicationHandler {
ContainerHealthResult.OverReplicatedHealthResult containerHealthResult =
((ContainerHealthResult.OverReplicatedHealthResult)
currentUnderRepRes);
- if (containerHealthResult.isSufficientlyReplicatedAfterPending()) {
+ if (containerHealthResult.isReplicatedOkAfterPending()) {
LOG.info("The container {} with replicas {} will be corrected " +
"by the pending delete", container.getContainerID(), replicas);
return emptyMap();
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
index 67ec093a54..024780db8e 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java
@@ -145,7 +145,7 @@ public class ECUnderReplicationHandler implements UnhealthyReplicationHandler {
ContainerHealthResult.UnderReplicatedHealthResult containerHealthResult =
((ContainerHealthResult.UnderReplicatedHealthResult)
currentUnderRepRes);
- if (containerHealthResult.isSufficientlyReplicatedAfterPending()) {
+ if (containerHealthResult.isReplicatedOkAfterPending()) {
LOG.info("The container {} with replicas {} is sufficiently replicated",
container.getContainerID(), replicaCount.getReplicas());
return emptyMap();
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 6314755d05..444461c6b3 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -167,15 +167,23 @@ public class ReplicationManager implements SCMService {
/**
* Constructs ReplicationManager instance with the given configuration.
*
- * @param conf OzoneConfiguration
- * @param containerManager ContainerManager
- * @param containerPlacement PlacementPolicy
- * @param eventPublisher EventPublisher
+ * @param conf The SCM configuration used by RM.
+ * @param containerManager The containerManager instance
+ * @param ratisContainerPlacement The Ratis container placement policy
+ * @param ecContainerPlacement The EC container placement policy
+ * @param eventPublisher The eventPublisher instance
+ * @param scmContext The SCMContext instance
+ * @param nodeManager The nodeManager instance
+ * @param clock Clock object used to get the current time
+ * @param legacyReplicationManager The legacy ReplicationManager instance
+ * @param replicaPendingOps The pendingOps instance
+ * @throws IOException
*/
@SuppressWarnings("parameternumber")
public ReplicationManager(final ConfigurationSource conf,
final ContainerManager containerManager,
- final PlacementPolicy containerPlacement,
+ final PlacementPolicy ratisContainerPlacement,
+ final PlacementPolicy ecContainerPlacement,
final EventPublisher eventPublisher,
final SCMContext scmContext,
final NodeManager nodeManager,
@@ -197,19 +205,20 @@ public class ReplicationManager implements SCMService {
TimeUnit.MILLISECONDS);
this.containerReplicaPendingOps = replicaPendingOps;
this.legacyReplicationManager = legacyReplicationManager;
- this.ecReplicationCheckHandler = new ECReplicationCheckHandler();
+ this.ecReplicationCheckHandler =
+ new ECReplicationCheckHandler(ecContainerPlacement);
this.ratisReplicationCheckHandler =
- new RatisReplicationCheckHandler(containerPlacement);
+ new RatisReplicationCheckHandler(ratisContainerPlacement);
this.nodeManager = nodeManager;
this.replicationQueue = new ReplicationQueue();
this.maintenanceRedundancy = rmConf.maintenanceRemainingRedundancy;
this.ratisMaintenanceMinReplicas = rmConf.getMaintenanceReplicaMinimum();
ecUnderReplicationHandler = new ECUnderReplicationHandler(
- ecReplicationCheckHandler, containerPlacement, conf, nodeManager);
+ ecReplicationCheckHandler, ecContainerPlacement, conf, nodeManager);
ecOverReplicationHandler =
new ECOverReplicationHandler(ecReplicationCheckHandler,
- containerPlacement, nodeManager);
+ ecContainerPlacement, nodeManager);
underReplicatedProcessor =
new UnderReplicatedProcessor(this, containerReplicaPendingOps,
eventPublisher, rmConf.getUnderReplicatedInterval());
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java
index d27c1d9c61..a14d21c76c 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java
@@ -32,6 +32,8 @@ public class ReplicationQueue {
underRepQueue;
private final Queue<ContainerHealthResult.OverReplicatedHealthResult>
overRepQueue;
+ private final Queue<ContainerHealthResult.MisReplicatedHealthResult>
+ misRepQueue;
public ReplicationQueue() {
underRepQueue = new PriorityQueue<>(
@@ -40,6 +42,7 @@ public class ReplicationQueue {
.thenComparing(ContainerHealthResult
.UnderReplicatedHealthResult::getRequeueCount));
overRepQueue = new LinkedList<>();
+ misRepQueue = new LinkedList<>();
}
public void enqueue(ContainerHealthResult.UnderReplicatedHealthResult
@@ -52,6 +55,11 @@ public class ReplicationQueue {
overRepQueue.add(overReplicatedHealthResult);
}
+ public void enqueue(ContainerHealthResult.MisReplicatedHealthResult
+ misReplicatedHealthResult) {
+ misRepQueue.add(misReplicatedHealthResult);
+ }
+
public ContainerHealthResult.UnderReplicatedHealthResult
dequeueUnderReplicatedContainer() {
return underRepQueue.poll();
@@ -62,6 +70,11 @@ public class ReplicationQueue {
return overRepQueue.poll();
}
+ public ContainerHealthResult.MisReplicatedHealthResult
+ dequeueMisReplicatedContainer() {
+ return misRepQueue.poll();
+ }
+
public int underReplicatedQueueSize() {
return underRepQueue.size();
}
@@ -70,4 +83,8 @@ public class ReplicationQueue {
return overRepQueue.size();
}
+ public int misReplicatedQueueSize() {
+ return misRepQueue.size();
+ }
+
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
index 277409ed8d..2aba554498 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java
@@ -17,6 +17,9 @@
package org.apache.hadoop.hdds.scm.container.replication.health;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
+import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
@@ -26,8 +29,11 @@ import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Set;
+import java.util.stream.Collectors;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.EC;
@@ -38,7 +44,10 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.E
*/
public class ECReplicationCheckHandler extends AbstractCheck {
- public ECReplicationCheckHandler() {
+ private final PlacementPolicy placementPolicy;
+
+ public ECReplicationCheckHandler(PlacementPolicy placementPolicy) {
+ this.placementPolicy = placementPolicy;
}
@Override
@@ -69,9 +78,7 @@ public class ECReplicationCheckHandler extends AbstractCheck {
report.incrementAndSample(
ReplicationManagerReport.HealthState.MISSING, containerID);
}
- // TODO - if it is unrecoverable, should we return false to other
- // handlers can be tried?
- if (!underHealth.isSufficientlyReplicatedAfterPending() &&
+ if (!underHealth.isReplicatedOkAfterPending() &&
!underHealth.isUnrecoverable()) {
request.getReplicationQueue().enqueue(underHealth);
}
@@ -82,10 +89,20 @@ public class ECReplicationCheckHandler extends AbstractCheck {
ReplicationManagerReport.HealthState.OVER_REPLICATED, containerID);
ContainerHealthResult.OverReplicatedHealthResult overHealth
= ((ContainerHealthResult.OverReplicatedHealthResult) health);
- if (!overHealth.isSufficientlyReplicatedAfterPending()) {
+ if (!overHealth.isReplicatedOkAfterPending()) {
request.getReplicationQueue().enqueue(overHealth);
}
return true;
+ } else if (health.getHealthState() ==
+ ContainerHealthResult.HealthState.MIS_REPLICATED) {
+ report.incrementAndSample(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED, containerID);
+ ContainerHealthResult.MisReplicatedHealthResult misRepHealth
+ = ((ContainerHealthResult.MisReplicatedHealthResult) health);
+ if (!misRepHealth.isReplicatedOkAfterPending()) {
+ request.getReplicationQueue().enqueue(misRepHealth);
+ }
+ return true;
}
// Should not get here, but incase it does the container is not healthy,
// but is also not under or over replicated.
@@ -128,7 +145,43 @@ public class ECReplicationCheckHandler extends AbstractCheck {
.OverReplicatedHealthResult(container, overRepIndexes.size(),
!replicaCount.isOverReplicated(true));
}
+ ContainerPlacementStatus placement = getPlacementStatus(replicas,
+ container.getReplicationConfig().getRequiredNodes(),
+ Collections.emptyList());
+ if (!placement.isPolicySatisfied()) {
+ ContainerPlacementStatus placementAfterPending = getPlacementStatus(
+ replicas, container.getReplicationConfig().getRequiredNodes(),
+ request.getPendingOps());
+ return new ContainerHealthResult.MisReplicatedHealthResult(
+ container, placementAfterPending.isPolicySatisfied());
+ }
// No issues detected, so return healthy.
return new ContainerHealthResult.HealthyResult(container);
}
+
+ /**
+ * Given a set of ContainerReplica, transform it to a list of DatanodeDetails
+ * and then check if the list meets the container placement policy.
+ * @param replicas List of containerReplica
+ * @param replicationFactor Expected Replication Factor of the containe
+ * @return ContainerPlacementStatus indicating if the policy is met or not
+ */
+ private ContainerPlacementStatus getPlacementStatus(
+ Set<ContainerReplica> replicas, int replicationFactor,
+ List<ContainerReplicaOp> pendingOps) {
+
+ Set<DatanodeDetails> replicaDns = replicas.stream()
+ .map(ContainerReplica::getDatanodeDetails)
+ .collect(Collectors.toSet());
+ for (ContainerReplicaOp op : pendingOps) {
+ if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
+ replicaDns.add(op.getTarget());
+ }
+ if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
+ replicaDns.remove(op.getTarget());
+ }
+ }
+ return placementPolicy.validateContainerPlacement(
+ new ArrayList<>(replicaDns), replicationFactor);
+ }
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
index cb53c59e08..9d33498dda 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
@@ -82,16 +82,10 @@ public class RatisReplicationCheckHandler extends AbstractCheck {
report.incrementAndSample(ReplicationManagerReport.HealthState.MISSING,
container.containerID());
}
- if (underHealth.isMisReplicated()) {
- report.incrementAndSample(
- ReplicationManagerReport.HealthState.MIS_REPLICATED,
- container.containerID());
- }
// TODO - if it is unrecoverable, should we return false to other
// handlers can be tried?
if (!underHealth.isUnrecoverable() &&
- (underHealth.isMisReplicatedAfterPending() ||
- !underHealth.isSufficientlyReplicatedAfterPending())) {
+ !underHealth.isReplicatedOkAfterPending()) {
request.getReplicationQueue().enqueue(underHealth);
}
return true;
@@ -104,11 +98,23 @@ public class RatisReplicationCheckHandler extends AbstractCheck {
container.containerID());
ContainerHealthResult.OverReplicatedHealthResult overHealth
= ((ContainerHealthResult.OverReplicatedHealthResult) health);
- if (!overHealth.isSufficientlyReplicatedAfterPending()) {
+ if (!overHealth.isReplicatedOkAfterPending()) {
request.getReplicationQueue().enqueue(overHealth);
}
return true;
}
+ if (health.getHealthState() ==
+ ContainerHealthResult.HealthState.MIS_REPLICATED) {
+ report.incrementAndSample(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED,
+ container.containerID());
+ ContainerHealthResult.MisReplicatedHealthResult misRepHealth
+ = ((ContainerHealthResult.MisReplicatedHealthResult) health);
+ if (!misRepHealth.isReplicatedOkAfterPending()) {
+ request.getReplicationQueue().enqueue(misRepHealth);
+ }
+ return true;
+ }
return false;
}
@@ -136,30 +142,15 @@ public class RatisReplicationCheckHandler extends AbstractCheck {
new RatisContainerReplicaCount(container, replicas, pendingAdd,
pendingDelete, requiredNodes, minReplicasForMaintenance);
- ContainerPlacementStatus placementStatus =
- getPlacementStatus(replicas, requiredNodes, Collections.emptyList());
-
- ContainerPlacementStatus placementStatusWithPending = placementStatus;
- if (replicaPendingOps.size() > 0) {
- placementStatusWithPending =
- getPlacementStatus(replicas, requiredNodes, replicaPendingOps);
- }
boolean sufficientlyReplicated
= replicaCount.isSufficientlyReplicated(false);
- boolean isPolicySatisfied = placementStatus.isPolicySatisfied();
- if (!sufficientlyReplicated || !isPolicySatisfied) {
+ if (!sufficientlyReplicated) {
ContainerHealthResult.UnderReplicatedHealthResult result =
new ContainerHealthResult.UnderReplicatedHealthResult(
container, replicaCount.getRemainingRedundancy(),
- isPolicySatisfied
- && replicas.size() - pendingDelete >= requiredNodes,
+ replicas.size() - pendingDelete >= requiredNodes,
replicaCount.isSufficientlyReplicated(true),
replicaCount.isUnrecoverable());
- result.setMisReplicated(!isPolicySatisfied)
- .setMisReplicatedAfterPending(
- !placementStatusWithPending.isPolicySatisfied())
- .setDueToMisReplication(
- !isPolicySatisfied && replicaCount.isSufficientlyReplicated());
return result;
}
@@ -169,6 +160,19 @@ public class RatisReplicationCheckHandler extends AbstractCheck {
return new ContainerHealthResult.OverReplicatedHealthResult(
container, replicaCount.getExcessRedundancy(false), repOkWithPending);
}
+
+ ContainerPlacementStatus placementStatus =
+ getPlacementStatus(replicas, requiredNodes, Collections.emptyList());
+ ContainerPlacementStatus placementStatusWithPending = placementStatus;
+ if (!placementStatus.isPolicySatisfied()) {
+ if (replicaPendingOps.size() > 0) {
+ placementStatusWithPending =
+ getPlacementStatus(replicas, requiredNodes, replicaPendingOps);
+ }
+ return new ContainerHealthResult.MisReplicatedHealthResult(
+ container, placementStatusWithPending.isPolicySatisfied());
+ }
+
// No issues detected, just return healthy.
return new ContainerHealthResult.HealthyResult(container);
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 3687763984..733a418444 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -761,6 +761,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
conf,
containerManager,
containerPlacementPolicy,
+ ecContainerPlacementPolicy,
eventQueue,
scmContext,
scmNodeManager,
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java
index f96790ab35..a1a3a05ed3 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
+import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault;
import org.apache.hadoop.hdds.scm.container.replication.health.ECReplicationCheckHandler;
import org.apache.hadoop.hdds.scm.net.NodeSchema;
import org.apache.hadoop.hdds.scm.net.NodeSchemaManager;
@@ -52,6 +53,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalSt
import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA;
import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA;
import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyList;
/**
* Tests the ECOverReplicationHandling functionality.
@@ -63,6 +66,7 @@ public class TestECOverReplicationHandler {
private OzoneConfiguration conf;
private PlacementPolicy policy;
private ECReplicationCheckHandler replicationCheck;
+ private PlacementPolicy placementPolicy;
@BeforeEach
public void setup() {
@@ -82,7 +86,11 @@ public class TestECOverReplicationHandler {
NodeSchema[] schemas =
new NodeSchema[] {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA};
NodeSchemaManager.getInstance().init(schemas, true);
- replicationCheck = new ECReplicationCheckHandler();
+ placementPolicy = Mockito.mock(PlacementPolicy.class);
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ anyList(), anyInt()))
+ .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3));
+ replicationCheck = new ECReplicationCheckHandler(placementPolicy);
}
@Test
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
index e688b23a24..e58f71d9ad 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
+import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault;
import org.apache.hadoop.hdds.scm.container.replication.health.ECReplicationCheckHandler;
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.net.NodeSchema;
@@ -60,6 +61,8 @@ import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA;
import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA;
import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA;
import static org.junit.Assert.assertThrows;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyList;
/**
* Tests the ECUnderReplicationHandling functionality.
@@ -73,6 +76,7 @@ public class TestECUnderReplicationHandler {
private static final int DATA = 3;
private static final int PARITY = 2;
private ECReplicationCheckHandler replicationCheck;
+ private PlacementPolicy ecPlacementPolicy;
@BeforeEach
public void setup() {
@@ -92,7 +96,11 @@ public class TestECUnderReplicationHandler {
NodeSchema[] schemas =
new NodeSchema[] {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA};
NodeSchemaManager.getInstance().init(schemas, true);
- replicationCheck = new ECReplicationCheckHandler();
+ ecPlacementPolicy = Mockito.mock(PlacementPolicy.class);
+ Mockito.when(ecPlacementPolicy.validateContainerPlacement(
+ anyList(), anyInt()))
+ .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3));
+ replicationCheck = new ECReplicationCheckHandler(ecPlacementPolicy);
}
@Test
@@ -210,10 +218,10 @@ public class TestECUnderReplicationHandler {
Mockito.mock(ContainerPlacementStatus.class);
Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied())
.thenReturn(false);
- Mockito.when(mockedPolicy.validateContainerPlacement(Mockito.anyList(),
- Mockito.anyInt())).thenReturn(mockedContainerPlacementStatus);
- Mockito.when(mockedPolicy.validateContainerPlacement(Mockito.anyList(),
- Mockito.anyInt())).thenAnswer(invocationOnMock -> {
+ Mockito.when(mockedPolicy.validateContainerPlacement(anyList(),
+ anyInt())).thenReturn(mockedContainerPlacementStatus);
+ Mockito.when(mockedPolicy.validateContainerPlacement(anyList(),
+ anyInt())).thenAnswer(invocationOnMock -> {
Set<DatanodeDetails> dns =
new HashSet<>(invocationOnMock.getArgument(0));
Assert.assertTrue(
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
index fe2c32a2af..ea8049a511 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
@@ -117,7 +117,8 @@ public class TestLegacyReplicationManager {
private ReplicationManager replicationManager;
private ContainerStateManager containerStateManager;
- private PlacementPolicy containerPlacementPolicy;
+ private PlacementPolicy ratisContainerPlacementPolicy;
+ private PlacementPolicy ecContainerPlacementPolicy;
private EventQueue eventQueue;
private DatanodeCommandHandler datanodeCommandHandler;
private SimpleMockNodeManager nodeManager;
@@ -193,9 +194,10 @@ public class TestLegacyReplicationManager {
.getContainerReplicas(((ContainerID)invocation
.getArguments()[0])));
- containerPlacementPolicy = Mockito.mock(PlacementPolicy.class);
+ ratisContainerPlacementPolicy = Mockito.mock(PlacementPolicy.class);
+ ecContainerPlacementPolicy = Mockito.mock(PlacementPolicy.class);
- Mockito.when(containerPlacementPolicy.chooseDatanodes(
+ Mockito.when(ratisContainerPlacementPolicy.chooseDatanodes(
Mockito.any(), Mockito.any(), Mockito.anyInt(),
Mockito.anyLong(), Mockito.anyLong()))
.thenAnswer(invocation -> {
@@ -205,7 +207,7 @@ public class TestLegacyReplicationManager {
.collect(Collectors.toList());
});
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.any(),
Mockito.anyInt()
)).thenAnswer(invocation ->
@@ -257,14 +259,15 @@ public class TestLegacyReplicationManager {
config, new SCMDBDefinition());
LegacyReplicationManager legacyRM = new LegacyReplicationManager(
- config, containerManager, containerPlacementPolicy, eventQueue,
+ config, containerManager, ratisContainerPlacementPolicy, eventQueue,
SCMContext.emptyContext(), nodeManager, scmHAManager, clock,
SCMDBDefinition.MOVE.getTable(dbStore));
replicationManager = new ReplicationManager(
config,
containerManager,
- containerPlacementPolicy,
+ ratisContainerPlacementPolicy,
+ ecContainerPlacementPolicy,
eventQueue,
SCMContext.emptyContext(),
nodeManager,
@@ -1102,7 +1105,7 @@ public class TestLegacyReplicationManager {
// Ensure a mis-replicated status is returned for any containers in this
// test where there are 3 replicas. When there are 2 or 4 replicas
// the status returned will be healthy.
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.argThat(list -> list.size() == 3),
Mockito.anyInt()
)).thenAnswer(invocation -> {
@@ -1138,7 +1141,7 @@ public class TestLegacyReplicationManager {
// Now make it so that all containers seem mis-replicated no matter how
// many replicas. This will test replicas are not scheduled if the new
// replica does not fix the mis-replication.
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.anyList(),
Mockito.anyInt()
)).thenAnswer(invocation -> {
@@ -1190,7 +1193,7 @@ public class TestLegacyReplicationManager {
// Ensure a mis-replicated status is returned for any containers in this
// test where there are exactly 3 replicas checked.
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.argThat(list -> list.size() == 3),
Mockito.anyInt()
)).thenAnswer(
@@ -1241,7 +1244,7 @@ public class TestLegacyReplicationManager {
id, replicaThree);
containerStateManager.updateContainerReplica(id, replicaFour);
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.argThat(list -> list.size() == 3),
Mockito.anyInt()
)).thenAnswer(
@@ -1289,7 +1292,7 @@ public class TestLegacyReplicationManager {
containerStateManager.updateContainerReplica(id, replicaFour);
containerStateManager.updateContainerReplica(id, replicaFive);
- Mockito.when(containerPlacementPolicy.validateContainerPlacement(
+ Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement(
Mockito.argThat(list -> list != null && list.size() <= 4),
Mockito.anyInt()
)).thenAnswer(
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
index f6c9ad9287..d0f358c1df 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
+import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
@@ -55,6 +56,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAF
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyList;
/**
* Tests for the ReplicationManager.
@@ -65,7 +68,8 @@ public class TestReplicationManager {
private ReplicationManager replicationManager;
private LegacyReplicationManager legacyReplicationManager;
private ContainerManager containerManager;
- private PlacementPolicy placementPolicy;
+ private PlacementPolicy ratisPlacementPolicy;
+ private PlacementPolicy ecPlacementPolicy;
private EventPublisher eventPublisher;
private SCMContext scmContext;
private NodeManager nodeManager;
@@ -83,7 +87,11 @@ public class TestReplicationManager {
configuration = new OzoneConfiguration();
configuration.set(HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "0s");
containerManager = Mockito.mock(ContainerManager.class);
- placementPolicy = Mockito.mock(PlacementPolicy.class);
+ ratisPlacementPolicy = Mockito.mock(PlacementPolicy.class);
+ ecPlacementPolicy = Mockito.mock(PlacementPolicy.class);
+ Mockito.when(ecPlacementPolicy.validateContainerPlacement(
+ anyList(), anyInt()))
+ .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3));
eventPublisher = Mockito.mock(EventPublisher.class);
scmContext = Mockito.mock(SCMContext.class);
nodeManager = Mockito.mock(NodeManager.class);
@@ -105,7 +113,8 @@ public class TestReplicationManager {
replicationManager = new ReplicationManager(
configuration,
containerManager,
- placementPolicy,
+ ratisPlacementPolicy,
+ ecPlacementPolicy,
eventPublisher,
scmContext,
nodeManager,
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java
index ad09e026e3..06f68ee91c 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java
@@ -18,11 +18,14 @@ package org.apache.hadoop.hdds.scm.container.replication.health;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
+import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
+import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault;
import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.OverReplicatedHealthResult;
@@ -35,6 +38,7 @@ import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import org.mockito.Mockito;
import java.util.ArrayList;
import java.util.Collections;
@@ -51,6 +55,8 @@ import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaO
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyList;
/**
* Tests for the ECContainerHealthCheck class.
@@ -63,11 +69,15 @@ public class TestECReplicationCheckHandler {
private int maintenanceRedundancy = 2;
private ContainerCheckRequest.Builder requestBuilder;
private ReplicationManagerReport report;
-
+ private PlacementPolicy placementPolicy;
@Before
public void setup() {
- healthCheck = new ECReplicationCheckHandler();
+ placementPolicy = Mockito.mock(PlacementPolicy.class);
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ anyList(), anyInt()))
+ .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3));
+ healthCheck = new ECReplicationCheckHandler(placementPolicy);
repConfig = new ECReplicationConfig(3, 2);
repQueue = new ReplicationQueue();
report = new ReplicationManagerReport();
@@ -109,7 +119,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -136,7 +146,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -164,7 +174,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -195,7 +205,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertTrue(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -226,7 +236,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -250,7 +260,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(-1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(result.isUnrecoverable());
@@ -294,7 +304,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(0, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(request));
@@ -354,7 +364,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getExcessRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(request));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
@@ -387,7 +397,7 @@ public class TestECReplicationCheckHandler {
healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getExcessRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(request));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
@@ -447,4 +457,160 @@ public class TestECReplicationCheckHandler {
ReplicationManagerReport.HealthState.OVER_REPLICATED));
}
+ @Test
+ public void testMisReplicatedContainer() {
+ ContainerInfo container = createContainerInfo(repConfig);
+
+ // Placement policy is always violated
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ Mockito.any(),
+ Mockito.anyInt()
+ )).thenAnswer(invocation ->
+ new ContainerPlacementStatusDefault(4, 5, 9));
+
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5));
+ ContainerCheckRequest request = requestBuilder
+ .setContainerReplicas(replicas)
+ .setContainerInfo(container)
+ .build();
+
+ ContainerHealthResult result = healthCheck.checkHealth(request);
+ Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState());
+
+ Assert.assertTrue(healthCheck.handle(request));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(1, repQueue.misReplicatedQueueSize());
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED));
+ }
+
+ @Test
+ public void testMisReplicatedContainerFixedByPending() {
+ ContainerInfo container = createContainerInfo(repConfig);
+
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ Mockito.any(),
+ Mockito.anyInt()
+ )).thenAnswer(invocation -> {
+ List<DatanodeDetails> dns = invocation.getArgument(0);
+ // If the number of DNs is 5 or less make it be mis-replicated
+ if (dns.size() <= 5) {
+ return new ContainerPlacementStatusDefault(4, 5, 9);
+ } else {
+ return new ContainerPlacementStatusDefault(5, 5, 9);
+ }
+ });
+
+ List<ContainerReplicaOp> pending = new ArrayList<>();
+ pending.add(ContainerReplicaOp.create(
+ ADD, MockDatanodeDetails.randomDatanodeDetails(), 1));
+
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5));
+ ContainerCheckRequest request = requestBuilder
+ .setContainerReplicas(replicas)
+ .setContainerInfo(container)
+ .setPendingOps(pending)
+ .build();
+
+ ContainerHealthResult result = healthCheck.checkHealth(request);
+ Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState());
+
+ // Under-replicated takes precedence and the over-replication is ignored
+ // for now.
+ Assert.assertTrue(healthCheck.handle(request));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.misReplicatedQueueSize());
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED));
+ }
+
+ @Test
+ public void testUnderAndMisReplicatedContainer() {
+ ContainerInfo container = createContainerInfo(repConfig);
+
+ // Placement policy is always violated
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ Mockito.any(),
+ Mockito.anyInt()
+ )).thenAnswer(invocation ->
+ new ContainerPlacementStatusDefault(4, 5, 9));
+
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4));
+ ContainerCheckRequest request = requestBuilder
+ .setContainerReplicas(replicas)
+ .setContainerInfo(container)
+ .build();
+
+ ContainerHealthResult result = healthCheck.checkHealth(request);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+
+ // Under-replicated takes precedence and the over-replication is ignored
+ // for now.
+ Assert.assertTrue(healthCheck.handle(request));
+ Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.misReplicatedQueueSize());
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED));
+ }
+
+ @Test
+ public void testOverAndMisReplicatedContainer() {
+ ContainerInfo container = createContainerInfo(repConfig);
+
+ // Placement policy is always violated
+ Mockito.when(placementPolicy.validateContainerPlacement(
+ Mockito.any(),
+ Mockito.anyInt()
+ )).thenAnswer(invocation ->
+ new ContainerPlacementStatusDefault(4, 5, 9));
+
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5), Pair.of(IN_SERVICE, 5));
+ ContainerCheckRequest request = requestBuilder
+ .setContainerReplicas(replicas)
+ .setContainerInfo(container)
+ .build();
+
+ ContainerHealthResult result = healthCheck.checkHealth(request);
+ Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
+
+ // Under-replicated takes precedence and the over-replication is ignored
+ // for now.
+ Assert.assertTrue(healthCheck.handle(request));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(1, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.misReplicatedQueueSize());
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.MIS_REPLICATED));
+ }
+
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
index d8dd69284b..f6493b7817 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacem
import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.HealthState;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.MisReplicatedHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.OverReplicatedHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.UnderReplicatedHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
@@ -128,7 +129,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -153,7 +154,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -178,7 +179,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -203,7 +204,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -230,7 +231,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertTrue(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -258,7 +259,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
@@ -279,7 +280,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(0, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
Assert.assertTrue(result.isUnrecoverable());
@@ -315,7 +316,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
Assert.assertEquals(4, result.getExcessRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
@@ -342,7 +343,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getExcessRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
@@ -367,7 +368,7 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getExcessRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
@@ -416,18 +417,16 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
- Assert.assertTrue(result.isMisReplicated());
- Assert.assertTrue(result.isMisReplicatedAfterPending());
- Assert.assertFalse(result.isDueToMisReplication());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.misReplicatedQueueSize());
Assert.assertEquals(1, report.getStat(
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
- Assert.assertEquals(1, report.getStat(
+ Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.MIS_REPLICATED));
}
@@ -463,23 +462,21 @@ public class TestRatisReplicationCheckHandler {
healthCheck.checkHealth(requestBuilder.build());
Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
Assert.assertEquals(1, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertFalse(result.underReplicatedDueToDecommission());
- Assert.assertTrue(result.isMisReplicated());
- Assert.assertFalse(result.isMisReplicatedAfterPending());
- Assert.assertFalse(result.isDueToMisReplication());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.misReplicatedQueueSize());
Assert.assertEquals(1, report.getStat(
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
- Assert.assertEquals(1, report.getStat(
+ Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.MIS_REPLICATED));
}
@Test
- public void testUnderReplicatedOnlyDueToMisReplication() {
+ public void testMisReplicated() {
Mockito.when(containerPlacementPolicy.validateContainerPlacement(
Mockito.any(),
Mockito.anyInt()
@@ -491,27 +488,23 @@ public class TestRatisReplicationCheckHandler {
= createReplicas(container.containerID(), 0, 0, 0);
requestBuilder.setContainerReplicas(replicas)
.setContainerInfo(container);
- UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ MisReplicatedHealthResult result = (MisReplicatedHealthResult)
healthCheck.checkHealth(requestBuilder.build());
- Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
- Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
- Assert.assertFalse(result.underReplicatedDueToDecommission());
- Assert.assertTrue(result.isMisReplicated());
- Assert.assertTrue(result.isMisReplicatedAfterPending());
- Assert.assertTrue(result.isDueToMisReplication());
+ Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
- Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
- Assert.assertEquals(1, report.getStat(
+ Assert.assertEquals(1, repQueue.misReplicatedQueueSize());
+ Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(1, report.getStat(
ReplicationManagerReport.HealthState.MIS_REPLICATED));
}
@Test
- public void testUnderReplicatedOnlyDueToMisReplicationFixByPending() {
+ public void testMisReplicatedFixedByPending() {
Mockito.when(containerPlacementPolicy.validateContainerPlacement(
Mockito.any(),
Mockito.anyInt()
@@ -538,20 +531,15 @@ public class TestRatisReplicationCheckHandler {
requestBuilder.setContainerReplicas(replicas)
.setContainerInfo(container)
.setPendingOps(pending);
- UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ MisReplicatedHealthResult result = (MisReplicatedHealthResult)
healthCheck.checkHealth(requestBuilder.build());
- Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
- Assert.assertEquals(2, result.getRemainingRedundancy());
- Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
- Assert.assertFalse(result.underReplicatedDueToDecommission());
- Assert.assertTrue(result.isMisReplicated());
- Assert.assertFalse(result.isMisReplicatedAfterPending());
- Assert.assertTrue(result.isDueToMisReplication());
+ Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState());
+ Assert.assertTrue(result.isReplicatedOkAfterPending());
Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
- Assert.assertEquals(1, report.getStat(
+ Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(1, report.getStat(
ReplicationManagerReport.HealthState.MIS_REPLICATED));
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org