You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by er...@apache.org on 2022/04/22 16:23:47 UTC
[ozone] branch master updated: HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)
This is an automated email from the ASF dual-hosted git repository.
erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new cb2754fd9c HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)
cb2754fd9c is described below
commit cb2754fd9c8a9e17ae4d0c2f0c970bfbb31e7315
Author: Hanisha Koneru <ha...@apache.org>
AuthorDate: Fri Apr 22 09:23:42 2022 -0700
HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)
---
.../ozone/container/keyvalue/KeyValueHandler.java | 13 ++++
.../src/main/proto/DatanodeClientProtocol.proto | 1 +
.../hdds/scm/container/ReplicationManager.java | 13 ++--
.../org/apache/hadoop/hdds/scm/HddsTestUtils.java | 18 +++++-
.../hdds/scm/container/TestReplicationManager.java | 72 +++++++++++++++++++++-
.../commandhandler/TestDeleteContainerHandler.java | 42 ++++++++++---
6 files changed, 140 insertions(+), 19 deletions(-)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index cce6c3ea57..d0a865fde3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -85,6 +85,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSIN
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.GET_SMALL_FILE_ERROR;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_CONTAINER_STATE;
@@ -1093,6 +1094,18 @@ public class KeyValueHandler extends Handler {
"Deletion of Open Container is not allowed.",
DELETE_ON_OPEN_CONTAINER);
}
+ // Safety check that the container is empty.
+ // If the container is not empty, it should not be deleted unless the
+ // container is beinf forcefully deleted (which happens when
+ // container is unhealthy or over-replicated).
+ if (container.getContainerData().getBlockCount() != 0) {
+ LOG.error("Received container deletion command for container {} but" +
+ " the container is not empty.",
+ container.getContainerData().getContainerID());
+ throw new StorageContainerException("Non-force deletion of " +
+ "non-empty container is not allowed.",
+ DELETE_ON_NON_EMPTY_CONTAINER);
+ }
}
long containerId = container.getContainerData().getContainerID();
containerSet.removeContainer(containerId);
diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
index 540ac7b571..3f6e9996d2 100644
--- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
@@ -146,6 +146,7 @@ enum Result {
BLOCK_TOKEN_VERIFICATION_FAILED = 41;
ERROR_IN_DB_SYNC = 42;
CHUNK_FILE_INCONSISTENCY = 43;
+ DELETE_ON_NON_EMPTY_CONTAINER = 44;
}
/**
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
index 4207482eeb..279163b8f3 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
@@ -961,6 +961,10 @@ public class ReplicationManager implements SCMService {
/**
* Returns true if the container is empty and CLOSED.
+ * A container is deemed empty if its keyCount (num of blocks) is 0. The
+ * usedBytes counter is not checked here because usedBytes is not a
+ * accurate representation of the committed blocks. There could be orphaned
+ * chunks in the container which contribute to the usedBytes.
*
* @param container Container to check
* @param replicas Set of ContainerReplicas
@@ -969,9 +973,8 @@ public class ReplicationManager implements SCMService {
private boolean isContainerEmpty(final ContainerInfo container,
final Set<ContainerReplica> replicas) {
return container.getState() == LifeCycleState.CLOSED &&
- (container.getUsedBytes() == 0 && container.getNumberOfKeys() == 0) &&
- replicas.stream().allMatch(r -> r.getState() == State.CLOSED &&
- r.getBytesUsed() == 0 && r.getKeyCount() == 0);
+ container.getNumberOfKeys() == 0 && replicas.stream().allMatch(
+ r -> r.getState() == State.CLOSED && r.getKeyCount() == 0);
}
/**
@@ -1064,11 +1067,9 @@ public class ReplicationManager implements SCMService {
Preconditions.assertTrue(container.getState() ==
LifeCycleState.CLOSED);
Preconditions.assertTrue(container.getNumberOfKeys() == 0);
- Preconditions.assertTrue(container.getUsedBytes() == 0);
replicas.stream().forEach(rp -> {
Preconditions.assertTrue(rp.getState() == State.CLOSED);
- Preconditions.assertTrue(rp.getBytesUsed() == 0);
Preconditions.assertTrue(rp.getKeyCount() == 0);
sendDeleteCommand(container, rp.getDatanodeDetails(), false);
});
@@ -1525,7 +1526,7 @@ public class ReplicationManager implements SCMService {
*/
unhealthyReplicas.stream().findFirst().ifPresent(replica ->
- sendDeleteCommand(container, replica.getDatanodeDetails(), false));
+ sendDeleteCommand(container, replica.getDatanodeDetails(), true));
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
index c9e4014300..9b22e3b2fe 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
@@ -97,6 +97,9 @@ public final class HddsTestUtils {
private static ThreadLocalRandom random = ThreadLocalRandom.current();
private static PipelineID randomPipelineID = PipelineID.randomId();
+ public static final long CONTAINER_USED_BYTES_DEFAULT = 100L;
+ public static final long CONTAINER_NUM_KEYS_DEFAULT = 2L;
+
private HddsTestUtils() {
}
@@ -687,13 +690,26 @@ public final class HddsTestUtils {
final long sequenceId,
final UUID originNodeId,
final DatanodeDetails datanodeDetails) {
+ return getReplicas(containerId, state, CONTAINER_USED_BYTES_DEFAULT,
+ CONTAINER_NUM_KEYS_DEFAULT, sequenceId, originNodeId, datanodeDetails);
+ }
+
+ public static ContainerReplica getReplicas(
+ final ContainerID containerId,
+ final ContainerReplicaProto.State state,
+ final long usedBytes,
+ final long keyCount,
+ final long sequenceId,
+ final UUID originNodeId,
+ final DatanodeDetails datanodeDetails) {
return ContainerReplica.newBuilder()
.setContainerID(containerId)
.setContainerState(state)
.setDatanodeDetails(datanodeDetails)
.setOriginNodeId(originNodeId)
.setSequenceId(sequenceId)
- .setBytesUsed(100)
+ .setBytesUsed(usedBytes)
+ .setKeyCount(keyCount)
.build();
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
index 9e266d1c6f..233bed7d45 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
@@ -91,6 +91,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
import static org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_NUM_KEYS_DEFAULT;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_USED_BYTES_DEFAULT;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas;
import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
@@ -1860,10 +1862,51 @@ public class TestReplicationManager {
.getNumDeletionCmdsTimeout());
}
+ /**
+ * A closed empty container with all the replicas also closed and empty
+ * should be deleted.
+ * A container/ replica should be deemed empty when it has 0 keyCount even
+ * if the usedBytes is not 0 (usedBytes should not be used to determine if
+ * the container or replica is empty).
+ */
+ @Test
+ public void testDeleteEmptyContainer() throws Exception {
+ // Create container with usedBytes = 1000 and keyCount = 0
+ final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 1000,
+ 0);
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+ // Create a replica with usedBytes != 0 and keyCount = 0
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 0);
+
+ assertDeleteScheduled(3);
+ }
+
+ /**
+ * A closed empty container with a non-empty replica should not be deleted.
+ */
+ @Test
+ public void testDeleteEmptyContainerNonEmptyReplica() throws Exception {
+ final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 0,
+ 0);
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+ // Create the 3rd replica with non-zero key count and used bytes
+ addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 1);
+ assertDeleteScheduled(0);
+ }
+
private ContainerInfo createContainer(LifeCycleState containerState)
throws IOException {
+ return createContainer(containerState, CONTAINER_USED_BYTES_DEFAULT,
+ CONTAINER_NUM_KEYS_DEFAULT);
+ }
+
+ private ContainerInfo createContainer(LifeCycleState containerState,
+ long usedBytes, long numKeys) throws IOException {
final ContainerInfo container = getContainer(containerState);
- container.setUsedBytes(1234);
+ container.setUsedBytes(usedBytes);
+ container.setNumberOfKeys(numKeys);
containerStateManager.addContainer(container.getProtobuf());
return container;
}
@@ -1891,6 +1934,13 @@ public class TestReplicationManager {
return addReplicaToDn(container, dn, replicaState);
}
+ private ContainerReplica addReplica(ContainerInfo container,
+ NodeStatus nodeStatus, State replicaState, long usedBytes, long numOfKeys)
+ throws ContainerNotFoundException {
+ DatanodeDetails dn = addNode(nodeStatus);
+ return addReplicaToDn(container, dn, replicaState, usedBytes, numOfKeys);
+ }
+
private ContainerReplica addReplicaToDn(ContainerInfo container,
DatanodeDetails dn, State replicaState)
throws ContainerNotFoundException {
@@ -1899,8 +1949,24 @@ public class TestReplicationManager {
// when processing over-replicated containers.
final UUID originNodeId =
UUID.nameUUIDFromBytes(Longs.toByteArray(container.getContainerID()));
- final ContainerReplica replica = getReplicas(
- container.containerID(), replicaState, 1000L, originNodeId, dn);
+ final ContainerReplica replica = getReplicas(container.containerID(),
+ replicaState, container.getUsedBytes(), container.getNumberOfKeys(),
+ 1000L, originNodeId, dn);
+ containerStateManager
+ .updateContainerReplica(container.containerID(), replica);
+ return replica;
+ }
+
+ private ContainerReplica addReplicaToDn(ContainerInfo container,
+ DatanodeDetails dn, State replicaState, long usedBytes, long numOfKeys)
+ throws ContainerNotFoundException {
+ // Using the same originID for all replica in the container set. If each
+ // replica has a unique originID, it causes problems in ReplicationManager
+ // when processing over-replicated containers.
+ final UUID originNodeId =
+ UUID.nameUUIDFromBytes(Longs.toByteArray(container.getContainerID()));
+ final ContainerReplica replica = getReplicas(container.containerID(),
+ replicaState, usedBytes, numOfKeys, 1000L, originNodeId, dn);
containerStateManager
.updateContainerReplica(container.containerID(), replica);
return replica;
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
index ca7a4d7750..a8aaf04fbd 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
@@ -53,6 +53,7 @@ import java.util.UUID;
import org.junit.Rule;
import org.junit.rules.Timeout;
+import org.slf4j.LoggerFactory;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE;
@@ -127,7 +128,7 @@ public class TestDeleteContainerHandler {
.getPipelineManager().getPipeline(container.getPipelineID());
// We need to close the container because delete container only happens
- // on closed containers with force flag set to false.
+ // on closed containers when force flag is set to false.
HddsDatanodeService hddsDatanodeService =
cluster.getHddsDatanodes().get(0);
@@ -165,16 +166,33 @@ public class TestDeleteContainerHandler {
cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
+ // Deleting a non-empty container should fail on DN when the force flag
+ // is false.
+ // Check the log for the error message when deleting non-empty containers
+ GenericTestUtils.LogCapturer logCapturer =
+ GenericTestUtils.LogCapturer.captureLogs(
+ LoggerFactory.getLogger(DeleteContainerCommandHandler.class));
+ GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("Non" +
+ "-force deletion of non-empty container is not allowed"), 500,
+ 5 * 1000);
+
+ // Set container blockCount to 0 to mock that it is empty
+ getContainerfromDN(hddsDatanodeService, containerId.getId())
+ .getContainerData().setBlockCount(0);
+
+ // Send the delete command again. It should succeed this time.
+ command.setTerm(
+ cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
+ nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
+
GenericTestUtils.waitFor(() ->
isContainerDeleted(hddsDatanodeService, containerId.getId()),
500, 5 * 1000);
Assert.assertTrue(isContainerDeleted(hddsDatanodeService,
containerId.getId()));
-
}
-
@Test
public void testDeleteContainerRequestHandlerOnOpenContainer()
throws Exception {
@@ -282,9 +300,8 @@ public class TestDeleteContainerHandler {
private Boolean isContainerClosed(HddsDatanodeService hddsDatanodeService,
long containerID) {
ContainerData containerData;
- containerData = hddsDatanodeService
- .getDatanodeStateMachine().getContainer().getContainerSet()
- .getContainer(containerID).getContainerData();
+ containerData = getContainerfromDN(hddsDatanodeService, containerID)
+ .getContainerData();
return !containerData.isOpen();
}
@@ -298,9 +315,16 @@ public class TestDeleteContainerHandler {
long containerID) {
Container container;
// if container is not in container set, it means container got deleted.
- container = hddsDatanodeService
- .getDatanodeStateMachine().getContainer().getContainerSet()
- .getContainer(containerID);
+ container = getContainerfromDN(hddsDatanodeService, containerID);
return container == null;
}
+
+ /**
+ * Return the container for the given containerID from the given DN.
+ */
+ private Container getContainerfromDN(HddsDatanodeService hddsDatanodeService,
+ long containerID) {
+ return hddsDatanodeService.getDatanodeStateMachine().getContainer()
+ .getContainerSet().getContainer(containerID);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org