You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by um...@apache.org on 2022/06/15 05:56:00 UTC
[ozone] branch master updated: HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)
This is an automated email from the ASF dual-hosted git repository.
umamahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new be29c6761f HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)
be29c6761f is described below
commit be29c6761f4fa28bb50f9e8077da6ae1ed50d819
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Jun 15 06:55:55 2022 +0100
HDDS-6697. EC: ReplicationManager - create class to detect EC container health issues (#3512)
---
.../scm/container/ECContainerReplicaCount.java | 107 +++++---
.../replication/ContainerHealthCheck.java | 38 +++
.../replication/ContainerHealthResult.java | 180 ++++++++++++++
.../replication/ECContainerHealthCheck.java | 107 ++++++++
.../replication/TestECContainerHealthCheck.java | 273 +++++++++++++++++++++
.../states/TestECContainerReplicaCount.java | 152 ++++++++----
6 files changed, 775 insertions(+), 82 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
index 607d5c4e4c..ac05a85762 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java
@@ -63,6 +63,7 @@ public class ECContainerReplicaCount {
private final ContainerInfo containerInfo;
private final ECReplicationConfig repConfig;
private final List<Integer> pendingAdd;
+ private final List<Integer> pendingDelete;
private final int remainingMaintenanceRedundancy;
private final Map<Integer, Integer> healthyIndexes = new HashMap<>();
private final Map<Integer, Integer> decommissionIndexes = new HashMap<>();
@@ -74,6 +75,7 @@ public class ECContainerReplicaCount {
this.containerInfo = containerInfo;
this.repConfig = (ECReplicationConfig)containerInfo.getReplicationConfig();
this.pendingAdd = indexesPendingAdd;
+ this.pendingDelete = indexesPendingDelete;
this.remainingMaintenanceRedundancy
= Math.min(repConfig.getParity(), remainingMaintenanceRedundancy);
@@ -152,15 +154,21 @@ public class ECContainerReplicaCount {
/**
* Returns an unsorted list of indexes which need additional copies to
* ensure the container is sufficiently replicated. These missing indexes will
- * not be on maintenance nodes, although they may be on decommissioning nodes.
- * Replicas pending delete are assumed to be removed and any pending add
- * are assume to be created and omitted them from the returned list. This list
- * can be used to determine which replicas must be recovered in a group,
- * assuming the inflight replicas pending add complete successfully.
- * @return List of missing indexes
+ * not be on maintenance nodes, or decommission nodes.
+ * Replicas pending delete are assumed to be removed.
+ * If includePendingAdd is true, any replicas pending add
+ * are assume to be created and omitted them from the returned list. If it is
+ * true, we assume the pendingAdd will complete, giving a view of the
+ * potential future state of the container.
+ * This list can be used to determine which replicas must be recovered via an
+ * EC reconstuction, rather than making copies of maintenance / decommission
+ * replicas
+ * @param includePendingAdd If true, treat pending add containers as if they
+ * have completed successfully.
+ * @return List of missing indexes which have no online copy.
*/
- public List<Integer> missingNonMaintenanceIndexes() {
- if (isSufficientlyReplicated()) {
+ public List<Integer> unavailableIndexes(boolean includePendingAdd) {
+ if (isSufficientlyReplicated(false)) {
return Collections.emptyList();
}
Set<Integer> missing = new HashSet<>();
@@ -171,14 +179,20 @@ public class ECContainerReplicaCount {
}
// Now we have a list of missing. Remove any pending add as they should
// eventually recover.
- for (Integer i : pendingAdd) {
- missing.remove(i);
+ if (includePendingAdd) {
+ for (Integer i : pendingAdd) {
+ missing.remove(i);
+ }
}
// Remove any maintenance copies, as they are still available. What remains
// is the set of indexes we have no copy of, and hence must get re-created
for (Integer i : maintenanceIndexes.keySet()) {
missing.remove(i);
}
+ // Remove any decommission copies, as they are still available
+ for (Integer i : decommissionIndexes.keySet()) {
+ missing.remove(i);
+ }
return missing.stream().collect(Collectors.toList());
}
@@ -214,16 +228,22 @@ public class ECContainerReplicaCount {
/**
* If any index has more than one copy that is not in maintenance or
- * decommission, then the container is over replicated. We consider inflight
- * deletes, assuming they will be removed. Inflight adds are ignored until
- * they are actually created.
+ * decommission, then the container is over replicated. If the
+ * includePendingDeletes flag is false we ignore replicas pending delete.
+ * If it is true, we assume inflight deletes have been removed, giving
+ * a view of the future state of the container if they complete successfully.
+ * Pending add are always ignored as they may fail to create.
* Note it is possible for a container to be both over and under replicated
* as it could have multiple copies of 1 index, but zero copies of another
* index.
+ * @param includePendingDelete If true, treat replicas pending delete as if
+ * they have deleted successfully.
* @return True if overReplicated, false otherwise.
*/
- public boolean isOverReplicated() {
- for (Integer count : healthyIndexes.values()) {
+ public boolean isOverReplicated(boolean includePendingDelete) {
+ final Map<Integer, Integer> availableIndexes
+ = getHealthyWithDelete(includePendingDelete);
+ for (Integer count : availableIndexes.values()) {
if (count > 1) {
return true;
}
@@ -237,13 +257,20 @@ public class ECContainerReplicaCount {
* as if we have excess including maintenance, it may be due to replication
* which was needed to ensure sufficient redundancy for maintenance.
* Pending adds are ignored as they may fail to complete.
+ * If the includePendingDeletes flag is false we ignore replicas pending
+ * delete. If it is true, we assume inflight deletes have been removed, giving
+ * a view of the future state of the container if they complete successfully.
* Pending deletes are assumed to complete and any indexes returned from here
* will have the pending deletes already removed.
+ * @param includePendingDelete If true, treat replicas pending delete as if
+ * * they have deleted successfully.
* @return List of indexes which are over-replicated.
*/
- public List<Integer> overReplicatedIndexes() {
+ public List<Integer> overReplicatedIndexes(boolean includePendingDelete) {
+ final Map<Integer, Integer> availableIndexes
+ = getHealthyWithDelete(includePendingDelete);
List<Integer> indexes = new ArrayList<>();
- for (Map.Entry<Integer, Integer> entry : healthyIndexes.entrySet()) {
+ for (Map.Entry<Integer, Integer> entry : availableIndexes.entrySet()) {
if (entry.getValue() > 1) {
indexes.add(entry.getKey());
}
@@ -251,30 +278,50 @@ public class ECContainerReplicaCount {
return indexes;
}
+ private Map<Integer, Integer> getHealthyWithDelete(boolean includeDelete) {
+ final Map<Integer, Integer> availableIndexes;
+ if (includeDelete) {
+ // Deletes are already removed from the healthy list so just use the
+ // healthy list
+ availableIndexes = Collections.unmodifiableMap(healthyIndexes);
+ } else {
+ availableIndexes = new HashMap<>(healthyIndexes);
+ pendingDelete.forEach(k -> availableIndexes.merge(k, 1, Integer::sum));
+ }
+ return availableIndexes;
+ }
+
/**
* The container is sufficiently replicated if the healthy indexes minus any
* pending deletes give a complete set of container indexes. If not, we must
* also check the maintenance indexes - the container is still sufficiently
* replicated if the complete set is made up of healthy + maintenance and
* there is still sufficient maintenance redundancy.
+ * If the includePendingAdd flag is set to true, this method treats replicas
+ * pending add as if they have completed and hence shows the potential future
+ * state of the container assuming they all complete.
+ * @param includePendingAdd If true, treat pending add containers as if they
+ * have completed successfully.
* @return True if sufficiently replicated, false otherwise.
*/
- public boolean isSufficientlyReplicated() {
- if (hasFullSetOfIndexes(healthyIndexes)) {
- return true;
+ public boolean isSufficientlyReplicated(boolean includePendingAdd) {
+ final Map<Integer, Integer> onlineIndexes;
+ if (includePendingAdd) {
+ onlineIndexes = new HashMap<>(healthyIndexes);
+ pendingAdd.forEach(k -> onlineIndexes.merge(k, 1, Integer::sum));
+ } else {
+ onlineIndexes = Collections.unmodifiableMap(healthyIndexes);
}
- // If we don't have a full healthy set, we could have some maintenance
- // replicas that make up the full set.
- // For maintenance, we must have at least dataNum + maintenance redundancy
- // available.
- if (healthyIndexes.size() <
- repConfig.getData() + remainingMaintenanceRedundancy) {
- return false;
+
+ if (hasFullSetOfIndexes(onlineIndexes)) {
+ return true;
}
- // Finally, check if the maintenance copies give a full set
- Map<Integer, Integer> healthy = new HashMap<>(healthyIndexes);
+ // Check if the maintenance copies give a full set and also that we do not
+ // have too many in maintenance
+ Map<Integer, Integer> healthy = new HashMap<>(onlineIndexes);
maintenanceIndexes.forEach((k, v) -> healthy.merge(k, v, Integer::sum));
- return hasFullSetOfIndexes(healthy);
+ return hasFullSetOfIndexes(healthy) && onlineIndexes.size()
+ >= repConfig.getData() + remainingMaintenanceRedundancy;
}
/**
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java
new file mode 100644
index 0000000000..fed85b8069
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthCheck.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Interface used by ReplicationManager to check if containers are healthy or
+ * not.
+ */
+public interface ContainerHealthCheck {
+
+ ContainerHealthResult checkHealth(
+ ContainerInfo container, Set<ContainerReplica> replicas,
+ List<Pair<Integer, DatanodeDetails>> indexesPendingAdd,
+ List<Pair<Integer, DatanodeDetails>> indexesPendingDelete,
+ int remainingRedundancyForMaintenance);
+}
\ No newline at end of file
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
new file mode 100644
index 0000000000..554e7260d1
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class used to represent the Health States of containers.
+ */
+public class ContainerHealthResult {
+
+ /**
+ * All possible container health states.
+ */
+ public enum HealthState {
+ HEALTHY,
+ UNDER_REPLICATED,
+ OVER_REPLICATED
+ }
+
+ private final ContainerInfo containerInfo;
+ private final HealthState healthState;
+ private final List<SCMCommand> commands = new ArrayList<>();
+
+ ContainerHealthResult(ContainerInfo containerInfo, HealthState healthState) {
+ this.containerInfo = containerInfo;
+ this.healthState = healthState;
+ }
+
+ public HealthState getHealthState() {
+ return healthState;
+ }
+
+ public void addCommand(SCMCommand command) {
+ commands.add(command);
+ }
+
+ public List<SCMCommand> getCommands() {
+ return commands;
+ }
+
+ public ContainerInfo getContainerInfo() {
+ return containerInfo;
+ }
+
+ /**
+ * Class for Healthy Container Check results.
+ */
+ public static class HealthyResult extends ContainerHealthResult {
+
+ HealthyResult(ContainerInfo containerInfo) {
+ super(containerInfo, HealthState.HEALTHY);
+ }
+ }
+
+ /**
+ * Class for UnderReplicated Container Check Results.
+ */
+ public static class UnderReplicatedHealthResult
+ extends ContainerHealthResult {
+
+ private final int remainingRedundancy;
+ private final boolean dueToDecommission;
+ private final boolean sufficientlyReplicatedAfterPending;
+ private final boolean unrecoverable;
+
+ UnderReplicatedHealthResult(ContainerInfo containerInfo,
+ int remainingRedundancy, boolean dueToDecommission,
+ boolean replicatedOkWithPending, boolean unrecoverable) {
+ super(containerInfo, HealthState.UNDER_REPLICATED);
+ this.remainingRedundancy = remainingRedundancy;
+ this.dueToDecommission = dueToDecommission;
+ this.sufficientlyReplicatedAfterPending = replicatedOkWithPending;
+ this.unrecoverable = unrecoverable;
+ }
+
+ /**
+ * How many more replicas can be lost before the the container is
+ * unreadable. For containers which are under-replicated due to decommission
+ * or maintenance only, the remaining redundancy will include those
+ * decommissioning or maintenance replicas, as they are technically still
+ * available until the datanode processes are stopped.
+ * @return Count of remaining redundant replicas.
+ */
+ public int getRemainingRedundancy() {
+ return remainingRedundancy;
+ }
+
+ /**
+ * Indicates whether the under-replication is caused only by replicas
+ * being decommissioned or entering maintenance. Ie, there are not replicas
+ * unavailable.
+ * @return True is the under-replication is caused by decommission.
+ */
+ public boolean underReplicatedDueToDecommission() {
+ return dueToDecommission;
+ }
+
+ /**
+ * Considering the pending replicas, which have been scheduled for copy or
+ * reconstruction, will the container still be under-replicated when they
+ * complete.
+ * @return True if the under-replication is corrected by the pending
+ * replicas. False otherwise.
+ */
+ public boolean isSufficientlyReplicatedAfterPending() {
+ return sufficientlyReplicatedAfterPending;
+ }
+
+ /**
+ * Indicates whether a container has enough replicas to be read. For Ratis
+ * at least one replia must be available. For EC, at least dataNum replicas
+ * are needed.
+ * @return True if the container has insufficient replicas available to be
+ * read, false otherwise
+ */
+ public boolean isUnrecoverable() {
+ return unrecoverable;
+ }
+ }
+
+ /**
+ * Class for Over Replicated Container Health Results.
+ */
+ public static class OverReplicatedHealthResult extends ContainerHealthResult {
+
+ private final int excessRedundancy;
+ private final boolean sufficientlyReplicatedAfterPending;
+
+
+ OverReplicatedHealthResult(ContainerInfo containerInfo,
+ int excessRedundancy, boolean replicatedOkWithPending) {
+ super(containerInfo, HealthState.OVER_REPLICATED);
+ this.excessRedundancy = excessRedundancy;
+ this.sufficientlyReplicatedAfterPending = replicatedOkWithPending;
+ }
+
+ /**
+ * How many extra replicas are present for this container and need to be
+ * removed. For EC, this number indicates how many indexes have more than
+ * one copy. Eg index 1 could have 3 copies. Index 2 could have 2 copies.
+ * The rest have 1 copy. The value returned here will be 2, indicating 2
+ * indexes have excess redundancy, even though we have to remove 3 replicas.
+ * For Ratis containers, the number return is simply the current replica
+ * count minus the expected replica count.
+ * @return The number of excess replicas.
+ */
+ public int getExcessRedundancy() {
+ return excessRedundancy;
+ }
+
+ /**
+ * Considering the pending replicas, which have been scheduled for delete,
+ * will the container still be over-replicated when they complete.
+ * @return True if the over-replication is corrected by the pending
+ * deletes. False otherwise.
+ */
+ public boolean isSufficientlyReplicatedAfterPending() {
+ return sufficientlyReplicatedAfterPending;
+ }
+ }
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java
new file mode 100644
index 0000000000..b879867d85
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.container.ECContainerReplicaCount;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Class to determine the health state of an EC Container. Given the container
+ * and current replica details, along with replicas pending add and delete,
+ * this class will return a ContainerHealthResult indicating if the container
+ * is healthy, or under / over replicated etc.
+ *
+ * For EC Containers, it is possible for a container to be both under and over
+ * replicated, if there are multiple copies of one index and no copies of
+ * another. This class only returns a single status, keeping the container in a
+ * single health state at any given time. Under replicated is a more serious
+ * state than over replicated, so it will take precedence over any over
+ * replication.
+ */
+public class ECContainerHealthCheck implements ContainerHealthCheck {
+
+ // TODO - mis-replicated containers are not yet handled.
+ // TODO - should this class handle empty / deleting containers, or would it
+ // be better handled elsewhere?
+
+ @Override
+ public ContainerHealthResult checkHealth(ContainerInfo container,
+ Set<ContainerReplica> replicas,
+ List<Pair<Integer, DatanodeDetails>> replicasPendingAdd,
+ List<Pair<Integer, DatanodeDetails>> replicasPendingDelete,
+ int remainingRedundancyForMaintenance) {
+ ECContainerReplicaCount replicaCount = getReplicaCountWithPending(container,
+ replicas, replicasPendingAdd, replicasPendingDelete,
+ remainingRedundancyForMaintenance);
+
+ ECReplicationConfig repConfig =
+ (ECReplicationConfig) container.getReplicationConfig();
+
+ if (!replicaCount.isSufficientlyReplicated(false)) {
+ List<Integer> missingIndexes = replicaCount.unavailableIndexes(false);
+ int remainingRedundancy = repConfig.getParity();
+ boolean dueToDecommission = true;
+ if (missingIndexes.size() > 0) {
+ // The container has reduced redundancy and will need reconstructed
+ // via an EC reconstruction command. Note that it may also have some
+ // replicas in decommission / maintenance states, but as the under
+ // replication is not caused only by decommission, we say it is not
+ // due to decommission/
+ dueToDecommission = false;
+ remainingRedundancy = repConfig.getParity() - missingIndexes.size();
+ }
+ return new ContainerHealthResult.UnderReplicatedHealthResult(
+ container, remainingRedundancy, dueToDecommission,
+ replicaCount.isSufficientlyReplicated(true),
+ replicaCount.unRecoverable());
+ }
+
+ if (replicaCount.isOverReplicated(false)) {
+ List<Integer> overRepIndexes = replicaCount.overReplicatedIndexes(false);
+ return new ContainerHealthResult
+ .OverReplicatedHealthResult(container, overRepIndexes.size(),
+ !replicaCount.isOverReplicated(true));
+ }
+
+ // No issues detected, so return healthy.
+ return new ContainerHealthResult.HealthyResult(container);
+ }
+
+ private ECContainerReplicaCount getReplicaCountWithPending(
+ ContainerInfo container, Set<ContainerReplica> replicas,
+ List<Pair<Integer, DatanodeDetails>> replicasPendingAdd,
+ List<Pair<Integer, DatanodeDetails>> replicasPendingDelete,
+ int remainingRedundancyForMaintenance) {
+ List<Integer> indexesPendingAdd = replicasPendingAdd.stream()
+ .map(i -> i.getLeft()).collect(Collectors.toList());
+ List<Integer> indexesPendingDelete = replicasPendingDelete.stream()
+ .map(i -> i.getLeft()).collect(Collectors.toList());
+
+ return new ECContainerReplicaCount(container, replicas, indexesPendingAdd,
+ indexesPendingDelete, remainingRedundancyForMaintenance);
+
+ }
+
+}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java
new file mode 100644
index 0000000000..b22999a2f8
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerHealthCheck.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.container.replication;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.OverReplicatedHealthResult;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.UnderReplicatedHealthResult;
+import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.HealthState;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+
+/**
+ * Tests for the ECContainerHealthCheck class.
+ */
+public class TestECContainerHealthCheck {
+
+ private ECContainerHealthCheck healthCheck;
+ private ECReplicationConfig repConfig;
+
+ @Before
+ public void setup() {
+ healthCheck = new ECContainerHealthCheck();
+ repConfig = new ECReplicationConfig(3, 2);
+ }
+
+ @Test
+ public void testHealthyContainerIsHealthy() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), 1, 2, 3, 4, 5);
+ ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+ Collections.emptyList(), Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+ }
+
+ @Test
+ public void testUnderReplicatedContainerIsUnderReplicated() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), 1, 2, 4, 5);
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas,
+ Collections.emptyList(), Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+ }
+
+ @Test
+ public void testUnderReplicatedContainerFixedWithPending() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), 1, 2, 4, 5);
+ List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+ pending.add(Pair.of(3, MockDatanodeDetails.randomDatanodeDetails()));
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, pending,
+ Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1, result.getRemainingRedundancy());
+ Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+ }
+
+ @Test
+ public void testUnderReplicatedDueToDecommission() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(DECOMMISSIONING, 4),
+ Pair.of(DECOMMISSIONED, 5));
+
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+ Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(2, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.underReplicatedDueToDecommission());
+ }
+
+ @Test
+ public void testUnderReplicatedDueToDecommissionFixedWithPending() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(DECOMMISSIONING, 4),
+ Pair.of(IN_SERVICE, 4), Pair.of(DECOMMISSIONED, 5));
+ List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+ pending.add(Pair.of(5, MockDatanodeDetails.randomDatanodeDetails()));
+
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, pending,
+ Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(2, result.getRemainingRedundancy());
+ Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertTrue(result.underReplicatedDueToDecommission());
+ }
+
+ @Test
+ public void testUnderReplicatedDueToDecommissionAndMissing() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(DECOMMISSIONING, 4), Pair.of(DECOMMISSIONED, 5));
+ List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+ pending.add(Pair.of(3, MockDatanodeDetails.randomDatanodeDetails()));
+
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, pending,
+ Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+ }
+
+ @Test
+ public void testUnderReplicatedAndUnrecoverable() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+ Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(-1, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isSufficientlyReplicatedAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+ Assert.assertTrue(result.isUnrecoverable());
+ }
+
+ @Test
+ public void testOverReplicatedContainer() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+ List<Pair<Integer, DatanodeDetails>> pending = new ArrayList<>();
+ pending.add(Pair.of(1, MockDatanodeDetails.randomDatanodeDetails()));
+ pending.add(Pair.of(2, MockDatanodeDetails.randomDatanodeDetails()));
+
+ OverReplicatedHealthResult result = (OverReplicatedHealthResult)
+ healthCheck.checkHealth(container, replicas, Collections.emptyList(),
+ pending, 2);
+ Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(2, result.getExcessRedundancy());
+ Assert.assertTrue(result.isSufficientlyReplicatedAfterPending());
+ }
+
+ @Test
+ public void testOverReplicatedContainerDueToMaintenance() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5),
+ Pair.of(IN_MAINTENANCE, 1), Pair.of(IN_MAINTENANCE, 2));
+
+ ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+ Collections.emptyList(), Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+ }
+
+ @Test
+ public void testOverAndUnderReplicated() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas = createReplicas(container.containerID(),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2));
+
+ ContainerHealthResult result = healthCheck.checkHealth(container, replicas,
+ Collections.emptyList(), Collections.emptyList(), 2);
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1,
+ ((UnderReplicatedHealthResult)result).getRemainingRedundancy());
+ }
+
+
+ private Set<ContainerReplica> createReplicas(ContainerID containerID,
+ Pair<HddsProtos.NodeOperationalState, Integer>... nodes) {
+ Set<ContainerReplica> replicas = new HashSet<>();
+ for (Pair<HddsProtos.NodeOperationalState, Integer> p : nodes) {
+ replicas.add(
+ createContainerReplica(containerID, p.getRight(), p.getLeft()));
+ }
+ return replicas;
+ }
+
+ private Set<ContainerReplica> createReplicas(ContainerID containerID,
+ int... indexes) {
+ Set<ContainerReplica> replicas = new HashSet<>();
+ for (int i : indexes) {
+ replicas.add(createContainerReplica(
+ containerID, i, IN_SERVICE));
+ }
+ return replicas;
+ }
+
+ private ContainerReplica createContainerReplica(ContainerID containerID,
+ int replicaIndex, HddsProtos.NodeOperationalState opState) {
+ ContainerReplica.ContainerReplicaBuilder builder
+ = ContainerReplica.newBuilder();
+ DatanodeDetails datanodeDetails
+ = MockDatanodeDetails.randomDatanodeDetails();
+ datanodeDetails.setPersistedOpState(opState);
+ builder.setContainerID(containerID);
+ builder.setReplicaIndex(replicaIndex);
+ builder.setKeyCount(123);
+ builder.setBytesUsed(1234);
+ builder.setContainerState(StorageContainerDatanodeProtocolProtos
+ .ContainerReplicaProto.State.CLOSED);
+ builder.setDatanodeDetails(datanodeDetails);
+ builder.setSequenceId(0);
+ builder.setOriginNodeId(datanodeDetails.getUuid());
+ return builder.build();
+ }
+
+ private ContainerInfo createContainerInfo(
+ ReplicationConfig replicationConfig) {
+ ContainerInfo.Builder builder = new ContainerInfo.Builder();
+ builder.setContainerID(1);
+ builder.setOwner("Ozone");
+ builder.setPipelineID(PipelineID.randomId());
+ builder.setReplicationConfig(replicationConfig);
+ builder.setState(HddsProtos.LifeCycleState.CLOSED);
+ return builder.build();
+ }
+}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
index fd26e9c3fa..b1b1cbbed0 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java
@@ -65,7 +65,7 @@ public class TestECContainerReplicaCount {
Pair.of(IN_SERVICE, 5));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
Assertions.assertFalse(rcnt.unRecoverable());
}
@@ -76,10 +76,10 @@ public class TestECContainerReplicaCount {
Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
Assertions.assertEquals(5,
- rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+ rcnt.unavailableIndexes(true).get(0).intValue());
}
@Test
@@ -92,10 +92,10 @@ public class TestECContainerReplicaCount {
delete.add(1);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
Assertions.assertEquals(1,
- rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+ rcnt.unavailableIndexes(true).get(0).intValue());
}
@Test
@@ -110,8 +110,8 @@ public class TestECContainerReplicaCount {
delete.add(2);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
}
@Test
@@ -127,10 +127,10 @@ public class TestECContainerReplicaCount {
delete.add(2);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertEquals(1, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertEquals(1, rcnt.unavailableIndexes(true).size());
Assertions.assertEquals(2,
- rcnt.missingNonMaintenanceIndexes().get(0).intValue());
+ rcnt.unavailableIndexes(true).get(0).intValue());
}
@Test
@@ -141,15 +141,15 @@ public class TestECContainerReplicaCount {
Pair.of(IN_MAINTENANCE, 5));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 0);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
List<Integer> delete = new ArrayList<>();
delete.add(1);
rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 0);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
}
@Test
@@ -163,9 +163,32 @@ public class TestECContainerReplicaCount {
delete.add(1);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
- Assertions.assertTrue(rcnt.isOverReplicated());
- Assertions.assertEquals(2, rcnt.overReplicatedIndexes().get(0).intValue());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertTrue(rcnt.isOverReplicated(true));
+ Assertions.assertEquals(2,
+ rcnt.overReplicatedIndexes(true).get(0).intValue());
+ Assertions.assertEquals(1, rcnt.overReplicatedIndexes(true).size());
+ Assertions.assertTrue(rcnt.isOverReplicated(false));
+ Assertions.assertEquals(2, rcnt.overReplicatedIndexes(false).size());
+ }
+
+ @Test
+ public void testOverReplicatedContainerFixedWithPendingDelete() {
+ Set<ContainerReplica> replica =
+ registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
+ Pair.of(IN_SERVICE, 5), Pair.of(IN_SERVICE, 1),
+ Pair.of(IN_SERVICE, 2));
+ List<Integer> delete = new ArrayList<>();
+ delete.add(1);
+ delete.add(2);
+ ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
+ replica, new ArrayList<>(), delete, 1);
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
+ Assertions.assertEquals(0, rcnt.overReplicatedIndexes(true).size());
+ Assertions.assertTrue(rcnt.isOverReplicated(false));
+ Assertions.assertEquals(2, rcnt.overReplicatedIndexes(false).size());
}
@Test
@@ -179,9 +202,10 @@ public class TestECContainerReplicaCount {
delete.add(1);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertTrue(rcnt.isOverReplicated());
- Assertions.assertEquals(2, rcnt.overReplicatedIndexes().get(0).intValue());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertTrue(rcnt.isOverReplicated(true));
+ Assertions.assertEquals(2,
+ rcnt.overReplicatedIndexes(true).get(0).intValue());
}
@Test
@@ -193,8 +217,8 @@ public class TestECContainerReplicaCount {
Pair.of(IN_MAINTENANCE, 5), Pair.of(IN_MAINTENANCE, 1));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(4, rcnt.additionalMaintenanceCopiesNeeded());
for (int i = 1; i <= repConfig.getRequiredNodes(); i++) {
Assertions.assertTrue(rcnt.maintenanceOnlyIndexes().contains(i));
@@ -209,8 +233,8 @@ public class TestECContainerReplicaCount {
Pair.of(IN_MAINTENANCE, 5), Pair.of(IN_MAINTENANCE, 1));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
@@ -220,8 +244,8 @@ public class TestECContainerReplicaCount {
// offline, we should be able to lost 2 more containers.
rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 2);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
@@ -239,8 +263,8 @@ public class TestECContainerReplicaCount {
delete.add(1);
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
@@ -258,8 +282,8 @@ public class TestECContainerReplicaCount {
Pair.of(IN_MAINTENANCE, 1), Pair.of(IN_MAINTENANCE, 5));
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertTrue(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
@@ -269,8 +293,8 @@ public class TestECContainerReplicaCount {
// offline, we should be able to lost 2 more containers.
rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 2);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(1, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
@@ -310,16 +334,40 @@ public class TestECContainerReplicaCount {
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), new ArrayList<>(), 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
+ Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
+ // Even though we don't need new copies, the following call will return
+ // any indexes only have a maintenance copy.
+ Assertions.assertEquals(0, rcnt.maintenanceOnlyIndexes().size());
+
+ Assertions.assertEquals(2, rcnt.unavailableIndexes(true).size());
+ Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(4));
+ Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(5));
+ }
+
+ @Test
+ public void testUnderReplicatedFixedWithPending() {
+ Set<ContainerReplica> replica =
+ registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
+ Pair.of(IN_SERVICE, 3));
+
+ List<Integer> pendingAdd = new ArrayList<>();
+ pendingAdd.add(4);
+ pendingAdd.add(5);
+ ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
+ replica, pendingAdd, new ArrayList<>(), 1);
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertTrue(rcnt.isSufficientlyReplicated(true));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
// Even though we don't need new copies, the following call will return
// any indexes only have a maintenance copy.
Assertions.assertEquals(0, rcnt.maintenanceOnlyIndexes().size());
- Assertions.assertEquals(2, rcnt.missingNonMaintenanceIndexes().size());
- Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(4));
- Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(5));
+ // Zero unavailable, as the pending adds are scheduled as we assume they
+ // will complete.
+ Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
}
@Test
@@ -333,12 +381,12 @@ public class TestECContainerReplicaCount {
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
- Assertions.assertEquals(2, rcnt.missingNonMaintenanceIndexes().size());
- Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(1));
- Assertions.assertTrue(rcnt.missingNonMaintenanceIndexes().contains(5));
+ Assertions.assertEquals(2, rcnt.unavailableIndexes(true).size());
+ Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(1));
+ Assertions.assertTrue(rcnt.unavailableIndexes(true).contains(5));
}
@Test
@@ -353,10 +401,10 @@ public class TestECContainerReplicaCount {
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, new ArrayList<>(), delete, 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
- Assertions.assertEquals(0, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
}
@Test
@@ -371,10 +419,10 @@ public class TestECContainerReplicaCount {
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
replica, add, new ArrayList<>(), 1);
- Assertions.assertFalse(rcnt.isSufficientlyReplicated());
- Assertions.assertFalse(rcnt.isOverReplicated());
+ Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
+ Assertions.assertFalse(rcnt.isOverReplicated(true));
- Assertions.assertEquals(0, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
}
@Test
@@ -382,14 +430,14 @@ public class TestECContainerReplicaCount {
ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container,
new HashSet<>(), new ArrayList<>(), new ArrayList<>(), 1);
Assertions.assertTrue(rcnt.unRecoverable());
- Assertions.assertEquals(5, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertEquals(5, rcnt.unavailableIndexes(true).size());
Set<ContainerReplica> replica =
registerNodes(Pair.of(IN_SERVICE, 1), Pair.of(IN_MAINTENANCE, 2));
rcnt = new ECContainerReplicaCount(container, replica, new ArrayList<>(),
new ArrayList<>(), 1);
Assertions.assertTrue(rcnt.unRecoverable());
- Assertions.assertEquals(3, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertEquals(3, rcnt.unavailableIndexes(true).size());
Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded());
replica =
@@ -400,7 +448,7 @@ public class TestECContainerReplicaCount {
new ArrayList<>(), 1);
// Not missing as the decommission replicas are still online
Assertions.assertFalse(rcnt.unRecoverable());
- Assertions.assertEquals(5, rcnt.missingNonMaintenanceIndexes().size());
+ Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size());
}
private Set<ContainerReplica> registerNodes(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org