You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by um...@apache.org on 2022/06/15 05:56:00 UTC

[ozone] branch master updated: HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)

This is an automated email from the ASF dual-hosted git repository.

umamahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new be29c6761f HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)
be29c6761f is described below

commit be29c6761f4fa28bb50f9e8077da6ae1ed50d819
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Jun 15 06:55:55 2022 +0100

    HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)
---
 .../scm/container/ECContainerReplicaCount.java     | 107 +++++---
 .../replication/ContainerHealthCheck.java          |  38 +++
 .../replication/ContainerHealthResult.java         | 180 ++++++++++++++
 .../replication/ECContainerHealthCheck.java        | 107 ++++++++
 .../replication/TestECContainerHealthCheck.java    | 273 +++++++++++++++++++++
 .../states/TestECContainerReplicaCount.java        | 152 ++++++++----
 6 files changed, 775 insertions(+), 82 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
index 607d5c4e4c..ac05a85762 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
@@ -63,6 +63,7 @@ public class ECContainerReplicaCount {
   private final ContainerInfo containerInfo;
   private final ECReplicationConfig repConfig;
   private final List<Integer> pendingAdd;
+  private final List<Integer> pendingDelete;
   private final int remainingMaintenanceRedundancy;
   private final Map<Integer, Integer> healthyIndexes = new HashMap<>();
   private final Map<Integer, Integer> decommissionIndexes = new HashMap<>();
@@ -74,6 +75,7 @@ public class ECContainerReplicaCount {
     this.containerInfo = containerInfo;
     this.repConfig = (ECReplicationConfig)containerInfo.getReplicationConfig();
     this.pendingAdd = indexesPendingAdd;
+    this.pendingDelete = indexesPendingDelete;
     this.remainingMaintenanceRedundancy
         = Math.min(repConfig.getParity(), remainingMaintenanceRedundancy);
 
@@ -152,15 +154,21 @@ public class ECContainerReplicaCount {
   /**
    * Returns an unsorted list of indexes which need additional copies to
    * ensure the container is sufficiently replicated. These missing indexes will
-   * not be on maintenance nodes, although they may be on decommissioning nodes.
-   * Replicas pending delete are assumed to be removed and any pending add
-   * are assume to be created and omitted them from the returned list. This list
-   * can be used to determine which replicas must be recovered in a group,
-   * assuming the inflight replicas pending add complete successfully.
-   * @return List of missing indexes
+   * not be on maintenance nodes, or decommission nodes.
+   * Replicas pending delete are assumed to be removed.
+   * If includePendingAdd is true, any replicas pending add
+   * are assume to be created and omitted them from the returned list. If it is
+   * true, we assume the pendingAdd will complete, giving a view of the
+   * potential future state of the container.
+   * This list can be used to determine which replicas must be recovered via an
+   * EC reconstuction, rather than making copies of maintenance / decommission
+   * replicas
+   * @param includePendingAdd  If true, treat pending add containers as if they
+   *                           have completed successfully.
+   * @return List of missing indexes which have no online copy.
    */
-  public List<Integer> missingNonMaintenanceIndexes() {
-    if (isSufficientlyReplicated()) {
+  public List<Integer> unavailableIndexes(boolean includePendingAdd) {
+    if (isSufficientlyReplicated(false)) {
       return Collections.emptyList();
     }
     Set<Integer> missing = new HashSet<>();
@@ -171,14 +179,20 @@ public class ECContainerReplicaCount {
     }
     // Now we have a list of missing. Remove any pending add as they should
     // eventually recover.
-    for (Integer i : pendingAdd) {
-      missing.remove(i);
+    if (includePendingAdd) {
+      for (Integer i : pendingAdd) {
+        missing.remove(i);
+      }
     }
     // Remove any maintenance copies, as they are still available. What remains
     // is the set of indexes we have no copy of, and hence must get re-created
     for (Integer i : maintenanceIndexes.keySet()) {
       missing.remove(i);
     }
+    // Remove any decommission copies, as they are still available
+    for (Integer i : decommissionIndexes.keySet()) {
+      missing.remove(i);
+    }
     return missing.stream().collect(Collectors.toList());
   }
 
@@ -214,16 +228,22 @@ public class ECContainerReplicaCount {
 
   /**
    * If any index has more than one copy that is not in maintenance or
-   * decommission, then the container is over replicated. We consider inflight
-   * deletes, assuming they will be removed. Inflight adds are ignored until
-   * they are actually created.
+   * decommission, then the container is over replicated. If the
+   * includePendingDeletes flag is false we ignore replicas pending delete.
+   * If it is true, we assume inflight deletes have been removed, giving
+   * a view of the future state of the container if they complete successfully.
+   * Pending add are always ignored as they may fail to create.
    * Note it is possible for a container to be both over and under replicated
    * as it could have multiple copies of 1 index, but zero copies of another
    * index.
+   * @param includePendingDelete If true, treat replicas pending delete as if
+   *                             they have deleted successfully.
    * @return True if overReplicated, false otherwise.
    */
-  public boolean isOverReplicated() {
-    for (Integer count : healthyIndexes.values()) {
+  public boolean isOverReplicated(boolean includePendingDelete) {
+    final Map<Integer, Integer> availableIndexes
+        = getHealthyWithDelete(includePendingDelete);
+    for (Integer count : availableIndexes.values()) {
       if (count > 1) {
         return true;
       }
@@ -237,13 +257,20 @@ public class ECContainerReplicaCount {
    * as if we have excess including maintenance, it may be due to replication
    * which was needed to ensure sufficient redundancy for maintenance.
    * Pending adds are ignored as they may fail to complete.
+   * If the includePendingDeletes flag is false we ignore replicas pending
+   * delete. If it is true, we assume inflight deletes have been removed, giving
+   * a view of the future state of the container if they complete successfully.
    * Pending deletes are assumed to complete and any indexes returned from here
    * will have the pending deletes already removed.
+   * @param includePendingDelete If true, treat replicas pending delete as if
+   *    *                        they have deleted successfully.
    * @return List of indexes which are over-replicated.
    */
-  public List<Integer> overReplicatedIndexes() {
+  public List<Integer> overReplicatedIndexes(boolean includePendingDelete) {
+    final Map<Integer, Integer> availableIndexes
+        = getHealthyWithDelete(includePendingDelete);
     List<Integer> indexes = new ArrayList<>();
-    for (Map.Entry<Integer, Integer> entry : healthyIndexes.entrySet()) {
+    for (Map.Entry<Integer, Integer> entry : availableIndexes.entrySet()) {
       if (entry.getValue() > 1) {
         indexes.add(entry.getKey());
       }
@@ -251,30 +278,50 @@ public class ECContainerReplicaCount {
     return indexes;
   }
 
+  private Map<Integer, Integer> getHealthyWithDelete(boolean includeDelete) {
+    final Map<Integer, Integer> availableIndexes;
+    if (includeDelete) {
+      // Deletes are already removed from the healthy list so just use the
+      // healthy list
+      availableIndexes = Collections.unmodifiableMap(healthyIndexes);
+    } else {
+      availableIndexes = new HashMap<>(healthyIndexes);
+      pendingDelete.forEach(k -> availableIndexes.merge(k, 1, Integer::sum));
+    }
+    return availableIndexes;
+  }
+
   /**
    * The container is sufficiently replicated if the healthy indexes minus any
    * pending deletes give a complete set of container indexes. If not, we must
    * also check the maintenance indexes - the container is still sufficiently
    * replicated if the complete set is made up of healthy + maintenance and
    * there is still sufficient maintenance redundancy.
+   * If the includePendingAdd flag is set to true, this method treats replicas
+   * pending add as if they have completed and hence shows the potential future
+   * state of the container assuming they all complete.
+   * @param includePendingAdd If true, treat pending add containers as if they
+   *                          have completed successfully.
    * @return True if sufficiently replicated, false otherwise.
    */
-  public boolean isSufficientlyReplicated() {
-    if (hasFullSetOfIndexes(healthyIndexes)) {
-      return true;
+  public boolean isSufficientlyReplicated(boolean includePendingAdd) {
+    final Map<Integer, Integer> onlineIndexes;
+    if (includePendingAdd) {
+      onlineIndexes = new HashMap<>(healthyIndexes);
+      pendingAdd.forEach(k -> onlineIndexes.merge(k, 1, Integer::sum));
+    } else {
+      onlineIndexes = Collections.unmodifiableMap(healthyIndexes);
     }
-    // If we don't have a full healthy set, we could have some maintenance
-    // replicas that make up the full set.
-    // For maintenance, we must have at least dataNum + maintenance redundancy
-    // available.
-    if (healthyIndexes.size() <
-        repConfig.getData() + remainingMaintenanceRedundancy) {
-      return false;
+
+    if (hasFullSetOfIndexes(onlineIndexes)) {
+      return true;
     }
-    // Finally, check if the maintenance copies give a full set
-    Map<Integer, Integer> healthy = new HashMap<>(healthyIndexes);
+    // Check if the maintenance copies give a full set and also that we do not
+    // have too many in maintenance
+    Map<Integer, Integer> healthy = new HashMap<>(onlineIndexes);
     maintenanceIndexes.forEach((k, v) -> healthy.merge(k, v, Integer::sum));
-    return hasFullSetOfIndexes(healthy);
+    return hasFullSetOfIndexes(healthy) && onlineIndexes.size()
+        >= repConfig.getData() + remainingMaintenanceRedundancy;
   }
 
   /**
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java
new file mode 100644
index 0000000000..fed85b8069
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Interface used by ReplicationManager to check if containers are healthy or
+ * not.
+ */
+public interface ContainerHealthCheck {
+
+  ContainerHealthResult checkHealth(
+      ContainerInfo container, Set<ContainerReplica> replicas,
+      List<Pair<Integer, DatanodeDetails>> indexesPendingAdd,
+      List<Pair<Integer, DatanodeDetails>> indexesPendingDelete,
+      int remainingRedundancyForMaintenance);
+}
\ No newline at end of file
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
new file mode 100644
index 0000000000..554e7260d1
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class used to represent the Health States of containers.
+ */
+public class ContainerHealthResult {
+
+  /**
+   * All possible container health states.
+   */
+  public enum HealthState {
+    HEALTHY,
+    UNDER_REPLICATED,
+    OVER_REPLICATED
+  }
+
+  private final ContainerInfo containerInfo;
+  private final HealthState healthState;
+  private final List<SCMCommand> commands = new ArrayList<>();
+
+  ContainerHealthResult(ContainerInfo containerInfo, HealthState healthState) {
+    this.containerInfo = containerInfo;
+    this.healthState = healthState;
+  }
+
+  public HealthState getHealthState() {
+    return healthState;
+  }
+
+  public void addCommand(SCMCommand command) {
+    commands.add(command);
+  }
+
+  public List<SCMCommand> getCommands() {
+    return commands;
+  }
+
+  public ContainerInfo getContainerInfo() {
+    return containerInfo;
+  }
+
+  /**
+   * Class for Healthy Container Check results.
+   */
+  public static class HealthyResult extends ContainerHealthResult {
+
+    HealthyResult(ContainerInfo containerInfo) {
+      super(containerInfo, HealthState.HEALTHY);
+    }
+  }
+
+  /**
+   * Class for UnderReplicated Container Check Results.
+   */
+  public static class UnderReplicatedHealthResult
+      extends ContainerHealthResult {
+
+    private final int remainingRedundancy;
+    private final boolean dueToDecommission;
+    private final boolean sufficientlyReplicatedAfterPending;
+    private final boolean unrecoverable;
+
+    UnderReplicatedHealthResult(ContainerInfo containerInfo,
+        int remainingRedundancy, boolean dueToDecommission,
+        boolean replicatedOkWithPending, boolean unrecoverable) {
+      super(containerInfo, HealthState.UNDER_REPLICATED);
+      this.remainingRedundancy = remainingRedundancy;
+      this.dueToDecommission = dueToDecommission;
+      this.sufficientlyReplicatedAfterPending = replicatedOkWithPending;
+      this.unrecoverable = unrecoverable;
+    }
+
+    /**
+     * How many more replicas can be lost before the the container is
+     * unreadable. For containers which are under-replicated due to decommission
+     * or maintenance only, the remaining redundancy will include those
+     * decommissioning or maintenance replicas, as they are technically still
+     * available until the datanode processes are stopped.
+     * @return Count of remaining redundant replicas.
+     */
+    public int getRemainingRedundancy() {
+      return remainingRedundancy;
+    }
+
+    /**
+     * Indicates whether the under-replication is caused only by replicas
+     * being decommissioned or entering maintenance. Ie, there are not replicas
+     * unavailable.
+     * @return True is the under-replication is caused by decommission.
+     */
+    public boolean underReplicatedDueToDecommission() {
+      return dueToDecommission;
+    }
+
+    /**
+     * Considering the pending replicas, which have been scheduled for copy or
+     * reconstruction, will the container still be under-replicated when they
+     * complete.
+     * @return True if the under-replication is corrected by the pending
+     *         replicas. False otherwise.
+     */
+    public boolean isSufficientlyReplicatedAfterPending() {
+      return sufficientlyReplicatedAfterPending;
+    }
+
+    /**
+     * Indicates whether a container has enough replicas to be read. For Ratis
+     * at least one replia must be available. For EC, at least dataNum replicas
+     * are needed.
+     * @return True if the container has insufficient replicas available to be
+     *         read, false otherwise
+     */
+    public boolean isUnrecoverable() {
+      return unrecoverable;
+    }
+  }
+
+  /**
+   * Class for Over Replicated Container Health Results.
+   */
+  public static class OverReplicatedHealthResult extends ContainerHealthResult {
+
+    private final int excessRedundancy;
+    private final boolean sufficientlyReplicatedAfterPending;
+
+
+    OverReplicatedHealthResult(ContainerInfo containerInfo,
+        int excessRedundancy, boolean replicatedOkWithPending) {
+      super(containerInfo, HealthState.OVER_REPLICATED);
+      this.excessRedundancy = excessRedundancy;
+      this.sufficientlyReplicatedAfterPending = replicatedOkWithPending;
+    }
+
+    /**
+     * How many extra replicas are present for this container and need to be
+     * removed. For EC, this number indicates how many indexes have more than
+     * one copy. Eg index 1 could have 3 copies. Index 2 could have 2 copies.
+     * The rest have 1 copy. The value returned here will be 2, indicating 2
+     * indexes have excess redundancy, even though we have to remove 3 replicas.
+     * For Ratis containers, the number return is simply the current replica
+     * count minus the expected replica count.
+     * @return The number of excess replicas.
+     */
+    public int getExcessRedundancy() {
+      return excessRedundancy;
+    }
+
+    /**
+     * Considering the pending replicas, which have been scheduled for delete,
+     * will the container still be over-replicated when they complete.
+     * @return True if the over-replication is corrected by the pending
+     *         deletes. False otherwise.
+     */
+    public boolean isSufficientlyReplicatedAfterPending() {
+      return sufficientlyReplicatedAfterPending;
+    }
+  }
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java
new file mode 100644
index 0000000000..b879867d85
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.container.ECContainerReplicaCount;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Class to determine the health state of an EC Container. Given the container
+ * and current replica details, along with replicas pending add and delete,
+ * this class will return a ContainerHealthResult indicating if the container
+ * is healthy, or under / over replicated etc.
+ *
+ * For EC Containers, it is possible for a container to be both under and over
+ * replicated, if there are multiple copies of one index and no copies of
+ * another. This class only returns a single status, keeping the container in a
+ * single health state at any given time. Under replicated is a more serious
+ * state than over replicated, so it will take precedence over any over
+ * replication.
+ */
+public class ECContainerHealthCheck implements ContainerHealthCheck {
+
+  // TODO - mis-replicated containers are not yet handled.
+  // TODO - should this class handle empty / deleting containers, or would it
+  //        be better handled elsewhere?
+
+  @Override
+  public ContainerHealthResult checkHealth(ContainerInfo container,
+      Set<ContainerReplica> replicas,
+      List<Pair<Integer, DatanodeDetails>> replicasPendingAdd,
+      List<Pair<Integer, DatanodeDetails>> replicasPendingDelete,
+      int remainingRedundancyForMaintenance) {
+    ECContainerReplicaCount replicaCount = getReplicaCountWithPending(container,
+          replicas, replicasPendingAdd, replicasPendingDelete,
+          remainingRedundancyForMaintenance);
+
+    ECReplicationConfig repConfig =
+        (ECReplicationConfig) container.getReplicationConfig();
+
+    if (!replicaCount.isSufficientlyReplicated(false)) {
+      List<Integer> missingIndexes = replicaCount.unavailableIndexes(false);
+      int remainingRedundancy = repConfig.getParity();
+      boolean dueToDecommission = true;
+      if (missingIndexes.size() > 0) {
+        // The container has reduced redundancy and will need reconstructed
+        // via an EC reconstruction command. Note that it may also have some
+        // replicas in decommission / maintenance states, but as the under
+        // replication is not caused only by decommission, we say it is not
+        // due to decommission/
+        dueToDecommission = false;
+        remainingRedundancy = repConfig.getParity() - missingIndexes.size();
+      }
+      return new ContainerHealthResult.UnderReplicatedHealthResult(
+          container, remainingRedundancy, dueToDecommission,
+          replicaCount.isSufficientlyReplicated(true),
+          replicaCount.unRecoverable());
+    }
+
+    if (replicaCount.isOverReplicated(false)) {
+      List<Integer> overRepIndexes = replicaCount.overReplicatedIndexes(false);
+      return new ContainerHealthResult
+          .OverReplicatedHealthResult(container, overRepIndexes.size(),
+          !replicaCount.isOverReplicated(true));
+    }
+
+    // No issues detected, so return healthy.
+    return new ContainerHealthResult.HealthyResult(container);
+  }
+
+  private ECContainerReplicaCount getReplicaCountWithPending(
+      ContainerInfo container, Set<ContainerReplica> replicas,
+      List<Pair<Integer, DatanodeDetails>> replicasPendingAdd,
+      List<Pair<Integer, DatanodeDetails>> replicasPendingDelete,
+      int remainingRedundancyForMaintenance) {
+    List<Integer> indexesPendingAdd = replicasPendingAdd.stream()
+        .map(i -> i.getLeft()).collect(Collectors.toList());
+    List<Integer> indexesPendingDelete = replicasPendingDelete.stream()
+        .map(i -> i.getLeft()).collect(Collectors.toList());
+
+    return new ECContainerReplicaCount(container, replicas, indexesPendingAdd,
+        indexesPendingDelete, remainingRedundancyForMaintenance);
+
+  }
+
+}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java
new file mode 100644
index 0000000000..b22999a2f8
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.OverReplicatedHealthResult;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.UnderReplicatedHealthResult;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.HealthState;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+
+/**
+ * Tests for the ECContainerHealthCheck class.
+ */
+public class TestECContainerHealthCheck {
+
+  private ECContainerHealthCheck healthCheck;
+  private ECReplicationConfig repConfig;
+
+  @Before
+  public void setup() {
+    healthCheck = new ECContainerHealthCheck();
+    repConfig = new ECReplicationConfig(3, 2);
+  }
+
+  @Test
+  public void testHealthyContainerIsHealthy() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), 1, 2, 3, 4, 5);
+    ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+        Collections.emptyList(), Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+  }
+
+  @Test
+  public void testUnderReplicatedContainerIsUnderReplicated() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), 1, 2, 4, 5);
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas,
+            Collections.emptyList(), Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+  }
+
+  @Test
+  public void testUnderReplicatedContainerFixedWithPending() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), 1, 2, 4, 5);
+    List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+    pending.add(Pair.of(3, MockDatanodeDetails.randomDatanodeDetails()));
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, pending,
+            Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1, result.getRemainingRedundancy());
+    Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+  }
+
+  @Test
+  public void testUnderReplicatedDueToDecommission() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(IN_SERVICE, 3), Pair.of(DECOMMISSIONING, 4),
+        Pair.of(DECOMMISSIONED, 5));
+
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+            Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(2, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertTrue(result.underReplicatedDueToDecommission());
+  }
+
+  @Test
+  public void testUnderReplicatedDueToDecommissionFixedWithPending() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(IN_SERVICE, 3), Pair.of(DECOMMISSIONING, 4),
+        Pair.of(IN_SERVICE, 4), Pair.of(DECOMMISSIONED, 5));
+    List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+    pending.add(Pair.of(5, MockDatanodeDetails.randomDatanodeDetails()));
+
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, pending,
+            Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(2, result.getRemainingRedundancy());
+    Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertTrue(result.underReplicatedDueToDecommission());
+  }
+
+  @Test
+  public void testUnderReplicatedDueToDecommissionAndMissing() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONED, 5));
+    List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+    pending.add(Pair.of(3, MockDatanodeDetails.randomDatanodeDetails()));
+
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, pending,
+            Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+  }
+
+  @Test
+  public void testUnderReplicatedAndUnrecoverable() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+            Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(-1, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+    Assert.assertTrue(result.isUnrecoverable());
+  }
+
+  @Test
+  public void testOverReplicatedContainer() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas =  createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+        Pair.of(IN_SERVICE, 5),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+    List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+    pending.add(Pair.of(1, MockDatanodeDetails.randomDatanodeDetails()));
+    pending.add(Pair.of(2, MockDatanodeDetails.randomDatanodeDetails()));
+
+    OverReplicatedHealthResult result = (OverReplicatedHealthResult)
+        healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+            pending, 2);
+    Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(2, result.getExcessRedundancy());
+    Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+  }
+
+  @Test
+  public void testOverReplicatedContainerDueToMaintenance() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas =  createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+        Pair.of(IN_SERVICE, 5),
+        Pair.of(IN_MAINTENANCE, 1), Pair.of(IN_MAINTENANCE, 2));
+
+    ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+        Collections.emptyList(), Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+  }
+
+  @Test
+  public void testOverAndUnderReplicated() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas =  createReplicas(container.containerID(),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+        Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+        Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+    ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+        Collections.emptyList(), Collections.emptyList(), 2);
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1,
+        ((UnderReplicatedHealthResult)result).getRemainingRedundancy());
+  }
+
+
+  private Set<ContainerReplica> createReplicas(ContainerID containerID,
+      Pair<HddsProtos.NodeOperationalState, Integer>... nodes) {
+    Set<ContainerReplica> replicas = new HashSet<>();
+    for (Pair<HddsProtos.NodeOperationalState, Integer> p : nodes) {
+      replicas.add(
+          createContainerReplica(containerID, p.getRight(), p.getLeft()));
+    }
+    return replicas;
+  }
+
+  private Set<ContainerReplica> createReplicas(ContainerID containerID,
+      int... indexes) {
+    Set<ContainerReplica> replicas = new HashSet<>();
+    for (int i : indexes) {
+      replicas.add(createContainerReplica(
+          containerID, i, IN_SERVICE));
+    }
+    return replicas;
+  }
+
+  private ContainerReplica createContainerReplica(ContainerID containerID,
+      int replicaIndex, HddsProtos.NodeOperationalState opState) {
+    ContainerReplica.ContainerReplicaBuilder builder
+        = ContainerReplica.newBuilder();
+    DatanodeDetails datanodeDetails
+        = MockDatanodeDetails.randomDatanodeDetails();
+    datanodeDetails.setPersistedOpState(opState);
+    builder.setContainerID(containerID);
+    builder.setReplicaIndex(replicaIndex);
+    builder.setKeyCount(123);
+    builder.setBytesUsed(1234);
+    builder.setContainerState(StorageContainerDatanodeProtocolProtos
+        .ContainerReplicaProto.State.CLOSED);
+    builder.setDatanodeDetails(datanodeDetails);
+    builder.setSequenceId(0);
+    builder.setOriginNodeId(datanodeDetails.getUuid());
+    return builder.build();
+  }
+
+  private ContainerInfo createContainerInfo(
+      ReplicationConfig replicationConfig) {
+    ContainerInfo.Builder builder = new ContainerInfo.Builder();
+    builder.setContainerID(1);
+    builder.setOwner("Ozone");
+    builder.setPipelineID(PipelineID.randomId());
+    builder.setReplicationConfig(replicationConfig);
+    builder.setState(HddsProtos.LifeCycleState.CLOSED);
+    return builder.build();
+  }
+}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
index fd26e9c3fa..b1b1cbbed0 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
@@ -65,7 +65,7 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_SERVICE, 5));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
     Assertions.assertFalse(rcnt.unRecoverable());
   }
 
@@ -76,10 +76,10 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
     Assertions.assertEquals(5,
-        rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+        rcnt.unavailableIndexes(true).get(0).intValue());
   }
 
   @Test
@@ -92,10 +92,10 @@ public class TestECContainerReplicaCount {
     delete.add(1);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
     Assertions.assertEquals(1,
-        rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+        rcnt.unavailableIndexes(true).get(0).intValue());
   }
 
   @Test
@@ -110,8 +110,8 @@ public class TestECContainerReplicaCount {
     delete.add(2);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
   }
 
   @Test
@@ -127,10 +127,10 @@ public class TestECContainerReplicaCount {
     delete.add(2);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
     Assertions.assertEquals(2,
-        rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+        rcnt.unavailableIndexes(true).get(0).intValue());
   }
 
   @Test
@@ -141,15 +141,15 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_MAINTENANCE, 5));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 0);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
     rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
     List<Integer> delete = new ArrayList<>();
     delete.add(1);
     rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 0);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
   }
 
   @Test
@@ -163,9 +163,32 @@ public class TestECContainerReplicaCount {
     delete.add(1);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
-    Assertions.assertTrue(rcnt.isOverReplicated());
-    Assertions.assertEquals(2, rcnt.overReplicatedIndexes().get(0).intValue());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertTrue(rcnt.isOverReplicated(true));
+    Assertions.assertEquals(2,
+        rcnt.overReplicatedIndexes(true).get(0).intValue());
+    Assertions.assertEquals(1, rcnt.overReplicatedIndexes(true).size());
+    Assertions.assertTrue(rcnt.isOverReplicated(false));
+    Assertions.assertEquals(2, rcnt.overReplicatedIndexes(false).size());
+  }
+
+  @Test
+  public void testOverReplicatedContainerFixedWithPendingDelete() {
+    Set<ContainerReplica> replica =
+        registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+            Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+            Pair.of(IN_SERVICE, 5), Pair.of(IN_SERVICE, 1),
+            Pair.of(IN_SERVICE, 2));
+    List<Integer> delete = new ArrayList<>();
+    delete.add(1);
+    delete.add(2);
+    ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
+        replica, new ArrayList<>(), delete, 1);
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
+    Assertions.assertEquals(0, rcnt.overReplicatedIndexes(true).size());
+    Assertions.assertTrue(rcnt.isOverReplicated(false));
+    Assertions.assertEquals(2, rcnt.overReplicatedIndexes(false).size());
   }
 
   @Test
@@ -179,9 +202,10 @@ public class TestECContainerReplicaCount {
     delete.add(1);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertTrue(rcnt.isOverReplicated());
-    Assertions.assertEquals(2, rcnt.overReplicatedIndexes().get(0).intValue());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertTrue(rcnt.isOverReplicated(true));
+    Assertions.assertEquals(2,
+        rcnt.overReplicatedIndexes(true).get(0).intValue());
   }
 
   @Test
@@ -193,8 +217,8 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_MAINTENANCE, 5), Pair.of(IN_MAINTENANCE, 1));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(4, rcnt.additionalMaintenanceCopiesNeeded());
     for (int i = 1; i <= repConfig.getRequiredNodes(); i++) {
       Assertions.assertTrue(rcnt.maintenanceOnlyIndexes().contains(i));
@@ -209,8 +233,8 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_MAINTENANCE, 5), Pair.of(IN_MAINTENANCE, 1));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
@@ -220,8 +244,8 @@ public class TestECContainerReplicaCount {
     // offline, we should be able to lost 2 more containers.
     rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 2);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
@@ -239,8 +263,8 @@ public class TestECContainerReplicaCount {
     delete.add(1);
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
@@ -258,8 +282,8 @@ public class TestECContainerReplicaCount {
             Pair.of(IN_MAINTENANCE, 1), Pair.of(IN_MAINTENANCE, 5));
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertTrue(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
@@ -269,8 +293,8 @@ public class TestECContainerReplicaCount {
     // offline, we should be able to lost 2 more containers.
     rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 2);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
@@ -310,16 +334,40 @@ public class TestECContainerReplicaCount {
 
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), new ArrayList<>(), 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
+    Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
+    // Even though we don't need new copies, the following call will return
+    // any indexes only have a maintenance copy.
+    Assertions.assertEquals(0, rcnt.maintenanceOnlyIndexes().size());
+
+    Assertions.assertEquals(2, rcnt.unavailableIndexes(true).size());
+    Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(4));
+    Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(5));
+  }
+
+  @Test
+  public void testUnderReplicatedFixedWithPending() {
+    Set<ContainerReplica> replica =
+        registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+            Pair.of(IN_SERVICE, 3));
+
+    List<Integer> pendingAdd = new ArrayList<>();
+    pendingAdd.add(4);
+    pendingAdd.add(5);
+    ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
+        replica, pendingAdd, new ArrayList<>(), 1);
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertTrue(rcnt.isSufficientlyReplicated(true));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
     Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
     // Even though we don't need new copies, the following call will return
     // any indexes only have a maintenance copy.
     Assertions.assertEquals(0, rcnt.maintenanceOnlyIndexes().size());
 
-    Assertions.assertEquals(2, rcnt.missingNonMaintenanceIndexes().size());
-    Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(4));
-    Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(5));
+    // Zero unavailable, as the pending adds are scheduled as we assume they
+    // will complete.
+    Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
   }
 
   @Test
@@ -333,12 +381,12 @@ public class TestECContainerReplicaCount {
 
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
 
-    Assertions.assertEquals(2, rcnt.missingNonMaintenanceIndexes().size());
-    Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(1));
-    Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(5));
+    Assertions.assertEquals(2, rcnt.unavailableIndexes(true).size());
+    Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(1));
+    Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(5));
   }
 
   @Test
@@ -353,10 +401,10 @@ public class TestECContainerReplicaCount {
 
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, new ArrayList<>(), delete, 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
 
-    Assertions.assertEquals(0, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
   }
 
   @Test
@@ -371,10 +419,10 @@ public class TestECContainerReplicaCount {
 
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         replica, add, new ArrayList<>(), 1);
-    Assertions.assertFalse(rcnt.isSufficientlyReplicated());
-    Assertions.assertFalse(rcnt.isOverReplicated());
+    Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+    Assertions.assertFalse(rcnt.isOverReplicated(true));
 
-    Assertions.assertEquals(0, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
   }
 
   @Test
@@ -382,14 +430,14 @@ public class TestECContainerReplicaCount {
     ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
         new HashSet<>(), new ArrayList<>(), new ArrayList<>(), 1);
     Assertions.assertTrue(rcnt.unRecoverable());
-    Assertions.assertEquals(5, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertEquals(5, rcnt.unavailableIndexes(true).size());
 
     Set<ContainerReplica> replica =
         registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_MAINTENANCE, 2));
     rcnt = new ECContainerReplicaCount(container, replica, new ArrayList<>(),
         new ArrayList<>(), 1);
     Assertions.assertTrue(rcnt.unRecoverable());
-    Assertions.assertEquals(3, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertEquals(3, rcnt.unavailableIndexes(true).size());
     Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
 
     replica =
@@ -400,7 +448,7 @@ public class TestECContainerReplicaCount {
         new ArrayList<>(), 1);
     // Not missing as the decommission replicas are still online
     Assertions.assertFalse(rcnt.unRecoverable());
-    Assertions.assertEquals(5, rcnt.missingNonMaintenanceIndexes().size());
+    Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
   }
 
   private Set<ContainerReplica> registerNodes(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org