You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by er...@apache.org on 2022/04/22 16:23:47 UTC

[ozone] branch master updated: HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)

This is an automated email from the ASF dual-hosted git repository.

erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new cb2754fd9c HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)
cb2754fd9c is described below

commit cb2754fd9c8a9e17ae4d0c2f0c970bfbb31e7315
Author: Hanisha Koneru <ha...@apache.org>
AuthorDate: Fri Apr 22 09:23:42 2022 -0700

    HDDS-6555. Container Deletion should not depend on usedBytes being zero (#3276)
---
 .../ozone/container/keyvalue/KeyValueHandler.java  | 13 ++++
 .../src/main/proto/DatanodeClientProtocol.proto    |  1 +
 .../hdds/scm/container/ReplicationManager.java     | 13 ++--
 .../org/apache/hadoop/hdds/scm/HddsTestUtils.java  | 18 +++++-
 .../hdds/scm/container/TestReplicationManager.java | 72 +++++++++++++++++++++-
 .../commandhandler/TestDeleteContainerHandler.java | 42 ++++++++++---
 6 files changed, 140 insertions(+), 19 deletions(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index cce6c3ea57..d0a865fde3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -85,6 +85,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSIN
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.GET_SMALL_FILE_ERROR;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_CONTAINER_STATE;
@@ -1093,6 +1094,18 @@ public class KeyValueHandler extends Handler {
               "Deletion of Open Container is not allowed.",
               DELETE_ON_OPEN_CONTAINER);
         }
+        // Safety check that the container is empty.
+        // If the container is not empty, it should not be deleted unless the
+        // container is beinf forcefully deleted (which happens when
+        // container is unhealthy or over-replicated).
+        if (container.getContainerData().getBlockCount() != 0) {
+          LOG.error("Received container deletion command for container {} but" +
+              " the container is not empty.",
+              container.getContainerData().getContainerID());
+          throw new StorageContainerException("Non-force deletion of " +
+              "non-empty container is not allowed.",
+              DELETE_ON_NON_EMPTY_CONTAINER);
+        }
       }
       long containerId = container.getContainerData().getContainerID();
       containerSet.removeContainer(containerId);
diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
index 540ac7b571..3f6e9996d2 100644
--- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
@@ -146,6 +146,7 @@ enum Result {
   BLOCK_TOKEN_VERIFICATION_FAILED = 41;
   ERROR_IN_DB_SYNC = 42;
   CHUNK_FILE_INCONSISTENCY = 43;
+  DELETE_ON_NON_EMPTY_CONTAINER = 44;
 }
 
 /**
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
index 4207482eeb..279163b8f3 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
@@ -961,6 +961,10 @@ public class ReplicationManager implements SCMService {
 
   /**
    * Returns true if the container is empty and CLOSED.
+   * A container is deemed empty if its keyCount (num of blocks) is 0. The
+   * usedBytes counter is not checked here because usedBytes is not a
+   * accurate representation of the committed blocks. There could be orphaned
+   * chunks in the container which contribute to the usedBytes.
    *
    * @param container Container to check
    * @param replicas Set of ContainerReplicas
@@ -969,9 +973,8 @@ public class ReplicationManager implements SCMService {
   private boolean isContainerEmpty(final ContainerInfo container,
       final Set<ContainerReplica> replicas) {
     return container.getState() == LifeCycleState.CLOSED &&
-        (container.getUsedBytes() == 0 && container.getNumberOfKeys() == 0) &&
-        replicas.stream().allMatch(r -> r.getState() == State.CLOSED &&
-            r.getBytesUsed() == 0 && r.getKeyCount() == 0);
+        container.getNumberOfKeys() == 0 && replicas.stream().allMatch(
+            r -> r.getState() == State.CLOSED && r.getKeyCount() == 0);
   }
 
   /**
@@ -1064,11 +1067,9 @@ public class ReplicationManager implements SCMService {
     Preconditions.assertTrue(container.getState() ==
         LifeCycleState.CLOSED);
     Preconditions.assertTrue(container.getNumberOfKeys() == 0);
-    Preconditions.assertTrue(container.getUsedBytes() == 0);
 
     replicas.stream().forEach(rp -> {
       Preconditions.assertTrue(rp.getState() == State.CLOSED);
-      Preconditions.assertTrue(rp.getBytesUsed() == 0);
       Preconditions.assertTrue(rp.getKeyCount() == 0);
       sendDeleteCommand(container, rp.getDatanodeDetails(), false);
     });
@@ -1525,7 +1526,7 @@ public class ReplicationManager implements SCMService {
      */
 
     unhealthyReplicas.stream().findFirst().ifPresent(replica ->
-        sendDeleteCommand(container, replica.getDatanodeDetails(), false));
+        sendDeleteCommand(container, replica.getDatanodeDetails(), true));
 
   }
 
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
index c9e4014300..9b22e3b2fe 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java
@@ -97,6 +97,9 @@ public final class HddsTestUtils {
   private static ThreadLocalRandom random = ThreadLocalRandom.current();
   private static PipelineID randomPipelineID = PipelineID.randomId();
 
+  public static final long CONTAINER_USED_BYTES_DEFAULT = 100L;
+  public static final long CONTAINER_NUM_KEYS_DEFAULT = 2L;
+
   private HddsTestUtils() {
   }
 
@@ -687,13 +690,26 @@ public final class HddsTestUtils {
       final long sequenceId,
       final UUID originNodeId,
       final DatanodeDetails datanodeDetails) {
+    return getReplicas(containerId, state, CONTAINER_USED_BYTES_DEFAULT,
+        CONTAINER_NUM_KEYS_DEFAULT, sequenceId, originNodeId, datanodeDetails);
+  }
+
+  public static ContainerReplica getReplicas(
+      final ContainerID containerId,
+      final ContainerReplicaProto.State state,
+      final long usedBytes,
+      final long keyCount,
+      final long sequenceId,
+      final UUID originNodeId,
+      final DatanodeDetails datanodeDetails) {
     return ContainerReplica.newBuilder()
         .setContainerID(containerId)
         .setContainerState(state)
         .setDatanodeDetails(datanodeDetails)
         .setOriginNodeId(originNodeId)
         .setSequenceId(sequenceId)
-        .setBytesUsed(100)
+        .setBytesUsed(usedBytes)
+        .setKeyCount(keyCount)
         .build();
   }
 
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
index 9e266d1c6f..233bed7d45 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java
@@ -91,6 +91,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
 import static org.apache.hadoop.hdds.protocol.proto
     .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_NUM_KEYS_DEFAULT;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_USED_BYTES_DEFAULT;
 import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
 import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas;
 import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
@@ -1860,10 +1862,51 @@ public class TestReplicationManager {
         .getNumDeletionCmdsTimeout());
   }
 
+  /**
+   * A closed empty container with all the replicas also closed and empty
+   * should be deleted.
+   * A container/ replica should be deemed empty when it has 0 keyCount even
+   * if the usedBytes is not 0 (usedBytes should not be used to determine if
+   * the container or replica is empty).
+   */
+  @Test
+  public void testDeleteEmptyContainer() throws Exception {
+    // Create container with usedBytes = 1000 and keyCount = 0
+    final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 1000,
+        0);
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+    // Create a replica with usedBytes != 0 and keyCount = 0
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 0);
+
+    assertDeleteScheduled(3);
+  }
+
+  /**
+   * A closed empty container with a non-empty replica should not be deleted.
+   */
+  @Test
+  public void testDeleteEmptyContainerNonEmptyReplica() throws Exception {
+    final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 0,
+        0);
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED);
+    // Create the 3rd replica with non-zero key count and used bytes
+    addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 1);
+    assertDeleteScheduled(0);
+  }
+
   private ContainerInfo createContainer(LifeCycleState containerState)
       throws IOException {
+    return createContainer(containerState, CONTAINER_USED_BYTES_DEFAULT,
+        CONTAINER_NUM_KEYS_DEFAULT);
+  }
+
+  private ContainerInfo createContainer(LifeCycleState containerState,
+      long usedBytes, long numKeys) throws IOException {
     final ContainerInfo container = getContainer(containerState);
-    container.setUsedBytes(1234);
+    container.setUsedBytes(usedBytes);
+    container.setNumberOfKeys(numKeys);
     containerStateManager.addContainer(container.getProtobuf());
     return container;
   }
@@ -1891,6 +1934,13 @@ public class TestReplicationManager {
     return addReplicaToDn(container, dn, replicaState);
   }
 
+  private ContainerReplica addReplica(ContainerInfo container,
+      NodeStatus nodeStatus, State replicaState, long usedBytes, long numOfKeys)
+      throws ContainerNotFoundException {
+    DatanodeDetails dn = addNode(nodeStatus);
+    return addReplicaToDn(container, dn, replicaState, usedBytes, numOfKeys);
+  }
+
   private ContainerReplica addReplicaToDn(ContainerInfo container,
                                DatanodeDetails dn, State replicaState)
       throws ContainerNotFoundException {
@@ -1899,8 +1949,24 @@ public class TestReplicationManager {
     // when processing over-replicated containers.
     final UUID originNodeId =
         UUID.nameUUIDFromBytes(Longs.toByteArray(container.getContainerID()));
-    final ContainerReplica replica = getReplicas(
-        container.containerID(), replicaState, 1000L, originNodeId, dn);
+    final ContainerReplica replica = getReplicas(container.containerID(),
+        replicaState, container.getUsedBytes(), container.getNumberOfKeys(),
+        1000L, originNodeId, dn);
+    containerStateManager
+        .updateContainerReplica(container.containerID(), replica);
+    return replica;
+  }
+
+  private ContainerReplica addReplicaToDn(ContainerInfo container,
+      DatanodeDetails dn, State replicaState, long usedBytes, long numOfKeys)
+      throws ContainerNotFoundException {
+    // Using the same originID for all replica in the container set. If each
+    // replica has a unique originID, it causes problems in ReplicationManager
+    // when processing over-replicated containers.
+    final UUID originNodeId =
+        UUID.nameUUIDFromBytes(Longs.toByteArray(container.getContainerID()));
+    final ContainerReplica replica = getReplicas(container.containerID(),
+        replicaState, usedBytes, numOfKeys, 1000L, originNodeId, dn);
     containerStateManager
         .updateContainerReplica(container.containerID(), replica);
     return replica;
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
index ca7a4d7750..a8aaf04fbd 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java
@@ -53,6 +53,7 @@ import java.util.UUID;
 
 import org.junit.Rule;
 import org.junit.rules.Timeout;
+import org.slf4j.LoggerFactory;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE;
@@ -127,7 +128,7 @@ public class TestDeleteContainerHandler {
         .getPipelineManager().getPipeline(container.getPipelineID());
 
     // We need to close the container because delete container only happens
-    // on closed containers with force flag set to false.
+    // on closed containers when force flag is set to false.
 
     HddsDatanodeService hddsDatanodeService =
         cluster.getHddsDatanodes().get(0);
@@ -165,16 +166,33 @@ public class TestDeleteContainerHandler {
         cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
     nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
 
+    // Deleting a non-empty container should fail on DN when the force flag
+    // is false.
+    // Check the log for the error message when deleting non-empty containers
+    GenericTestUtils.LogCapturer logCapturer =
+        GenericTestUtils.LogCapturer.captureLogs(
+            LoggerFactory.getLogger(DeleteContainerCommandHandler.class));
+    GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("Non" +
+        "-force deletion of non-empty container is not allowed"), 500,
+        5 * 1000);
+
+    // Set container blockCount to 0 to mock that it is empty
+    getContainerfromDN(hddsDatanodeService, containerId.getId())
+        .getContainerData().setBlockCount(0);
+
+    // Send the delete command again. It should succeed this time.
+    command.setTerm(
+        cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
+    nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
+
     GenericTestUtils.waitFor(() ->
             isContainerDeleted(hddsDatanodeService, containerId.getId()),
         500, 5 * 1000);
 
     Assert.assertTrue(isContainerDeleted(hddsDatanodeService,
         containerId.getId()));
-
   }
 
-
   @Test
   public void testDeleteContainerRequestHandlerOnOpenContainer()
       throws Exception {
@@ -282,9 +300,8 @@ public class TestDeleteContainerHandler {
   private Boolean isContainerClosed(HddsDatanodeService hddsDatanodeService,
       long containerID) {
     ContainerData containerData;
-    containerData = hddsDatanodeService
-        .getDatanodeStateMachine().getContainer().getContainerSet()
-        .getContainer(containerID).getContainerData();
+    containerData = getContainerfromDN(hddsDatanodeService, containerID)
+        .getContainerData();
     return !containerData.isOpen();
   }
 
@@ -298,9 +315,16 @@ public class TestDeleteContainerHandler {
       long containerID) {
     Container container;
     // if container is not in container set, it means container got deleted.
-    container = hddsDatanodeService
-        .getDatanodeStateMachine().getContainer().getContainerSet()
-        .getContainer(containerID);
+    container = getContainerfromDN(hddsDatanodeService, containerID);
     return container == null;
   }
+
+  /**
+   * Return the container for the given containerID from the given DN.
+   */
+  private Container getContainerfromDN(HddsDatanodeService hddsDatanodeService,
+      long containerID) {
+    return hddsDatanodeService.getDatanodeStateMachine().getContainer()
+        .getContainerSet().getContainer(containerID);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org