You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ozone.apache.org by GitBox <gi...@apache.org> on 2022/08/22 10:50:12 UTC

[GitHub] [ozone] adoroszlai commented on a diff in pull request #3660: HDDS-7090. EC: delete empty closed EC container

adoroszlai commented on code in PR #3660:
URL: https://github.com/apache/ozone/pull/3660#discussion_r951282465


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java:
##########
@@ -448,6 +436,153 @@ protected ContainerHealthResult processContainer(ContainerInfo containerInfo,
     return health;
   }
 
+  private ContainerHealthResult checkContainerState(
+      final ContainerInfo containerInfo, final ReplicationManagerReport report)
+      throws ContainerNotFoundException {
+    ContainerID containerID = containerInfo.containerID();
+    Set<ContainerReplica> replicas = containerManager
+        .getContainerReplicas(containerID);
+    HddsProtos.LifeCycleState state = containerInfo.getState();
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(containerInfo);
+
+    if (state == HddsProtos.LifeCycleState.OPEN) {
+      if (!isOpenContainerHealthy(containerInfo, replicas)) {
+        report.incrementAndSample(
+            HealthState.OPEN_UNHEALTHY, containerID);
+        eventPublisher.fireEvent(SCMEvents.CLOSE_CONTAINER, containerID);
+        return new ContainerHealthResult.UnHealthyResult(containerInfo);
+      }
+      return health;
+    }
+
+    /*
+     * If the container is in CLOSING state, the replicas can either
+     * be in OPEN or in CLOSING state. In both of this cases
+     * we have to resend close container command to the datanodes.
+     */
+    if (state == HddsProtos.LifeCycleState.CLOSING) {
+      return handleClosingContainer(containerInfo, replicas);
+    }
+
+    //TODO: handle quosi_closed container for ratis after refactoring legacyRM
+
+    if (state == HddsProtos.LifeCycleState.CLOSED) {
+      List<ContainerReplica> unhealthyReplicas = replicas.stream()
+          .filter(r -> !compareState(containerInfo.getState(), r.getState()))
+          .collect(Collectors.toList());
+
+      if (unhealthyReplicas.size() > 0) {
+        return handleUnhealthyReplicas(containerInfo, unhealthyReplicas);
+      }
+    }
+
+    /*
+     * If container is under deleting and all it's replicas are deleted,
+     * then make the container as CLEANED,
+     * or resend the delete replica command if needed.
+     */
+    if (state == HddsProtos.LifeCycleState.DELETING) {
+      return handleDeletingContainer(containerInfo, replicas);
+    }
+
+    if (state == HddsProtos.LifeCycleState.DELETED) {
+      return health;
+    }
+
+    return null;
+  }
+
+  /**
+   * Handle the container which is in closing state.
+   *
+   * @param container ContainerInfo
+   * @param replicas Set of ContainerReplicas
+   */
+  private ContainerHealthResult handleClosingContainer(
+      final ContainerInfo container, final Set<ContainerReplica> replicas) {
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(container);
+    ContainerID containerID = container.containerID();
+    long term;
+    try {
+      term = scmContext.getTermOfLeader();
+    } catch (NotLeaderException nle) {
+      LOG.warn("Skip sending datanode command,"
+          + " since current SCM is not leader.", nle);
+      return health;
+    }
+
+    replicas.stream()
+        .filter(r -> r.getState() != ContainerReplicaProto.State.UNHEALTHY)
+        .forEach(r -> {
+          final CloseContainerCommand closeContainerCommand =
+              new CloseContainerCommand(
+                  containerID.getId(), container.getPipelineID(), false);
+          closeContainerCommand.setTerm(term);
+          closeContainerCommand.setEncodedToken(getContainerToken(containerID));
+          health.addCommand(new CommandForDatanode<>(
+              r.getDatanodeDetails().getUuid(), closeContainerCommand));
+        });
+
+    return health;
+  }
+
+  /**
+   * Handle the container which is in deleting state.
+   *
+   * @param container ContainerInfo
+   * @param replicas Set of ContainerReplicas
+   */
+  private ContainerHealthResult handleDeletingContainer(
+      final ContainerInfo container, final Set<ContainerReplica> replicas) {
+    ContainerID containerID = container.containerID();
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(container);
+    if (replicas.size() == 0) {
+      try {
+        containerManager.updateContainerState(containerID,
+            HddsProtos.LifeCycleEvent.CLEANUP);
+      } catch (Exception e) {
+        LOG.warn("Cannot update container state with CLEANUP.", e);
+        return health;
+      }
+      LOG.debug("Container {} state changes to DELETED", container);
+    } else {
+      // Check whether to resend deletionCommand
+      Set<DatanodeDetails> deletionInFlight = containerReplicaPendingOps
+          .getPendingOps(containerID).stream()
+          .filter(r -> r.getOpType() == DELETE)
+          .map(ContainerReplicaOp::getTarget).collect(Collectors.toSet());
+
+      Set<ContainerReplica> filteredReplicas = replicas.stream()
+          .filter(r -> !deletionInFlight.contains(r.getDatanodeDetails()))
+          .collect(Collectors.toSet());
+
+      long term;
+      try {
+        term = scmContext.getTermOfLeader();
+      } catch (NotLeaderException nle) {
+        LOG.warn("Skip sending datanode command,"
+            + " since current SCM is not leader.", nle);
+        return health;
+      }
+
+      filteredReplicas.stream().forEach(rp -> {

Review Comment:
   ```suggestion
         filteredReplicas.forEach(rp -> {
   ```



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java:
##########
@@ -448,6 +436,153 @@ protected ContainerHealthResult processContainer(ContainerInfo containerInfo,
     return health;
   }
 
+  private ContainerHealthResult checkContainerState(
+      final ContainerInfo containerInfo, final ReplicationManagerReport report)
+      throws ContainerNotFoundException {
+    ContainerID containerID = containerInfo.containerID();
+    Set<ContainerReplica> replicas = containerManager
+        .getContainerReplicas(containerID);
+    HddsProtos.LifeCycleState state = containerInfo.getState();
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(containerInfo);
+
+    if (state == HddsProtos.LifeCycleState.OPEN) {

Review Comment:
   Nit: Since each state has a separate handler, maybe `switch` would be a better fit.



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java:
##########
@@ -448,6 +436,153 @@ protected ContainerHealthResult processContainer(ContainerInfo containerInfo,
     return health;
   }
 
+  private ContainerHealthResult checkContainerState(
+      final ContainerInfo containerInfo, final ReplicationManagerReport report)
+      throws ContainerNotFoundException {
+    ContainerID containerID = containerInfo.containerID();
+    Set<ContainerReplica> replicas = containerManager
+        .getContainerReplicas(containerID);
+    HddsProtos.LifeCycleState state = containerInfo.getState();
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(containerInfo);
+
+    if (state == HddsProtos.LifeCycleState.OPEN) {
+      if (!isOpenContainerHealthy(containerInfo, replicas)) {
+        report.incrementAndSample(
+            HealthState.OPEN_UNHEALTHY, containerID);
+        eventPublisher.fireEvent(SCMEvents.CLOSE_CONTAINER, containerID);
+        return new ContainerHealthResult.UnHealthyResult(containerInfo);
+      }
+      return health;
+    }
+
+    /*
+     * If the container is in CLOSING state, the replicas can either
+     * be in OPEN or in CLOSING state. In both of this cases
+     * we have to resend close container command to the datanodes.
+     */
+    if (state == HddsProtos.LifeCycleState.CLOSING) {
+      return handleClosingContainer(containerInfo, replicas);
+    }
+
+    //TODO: handle quosi_closed container for ratis after refactoring legacyRM
+
+    if (state == HddsProtos.LifeCycleState.CLOSED) {
+      List<ContainerReplica> unhealthyReplicas = replicas.stream()
+          .filter(r -> !compareState(containerInfo.getState(), r.getState()))
+          .collect(Collectors.toList());
+
+      if (unhealthyReplicas.size() > 0) {
+        return handleUnhealthyReplicas(containerInfo, unhealthyReplicas);
+      }
+    }
+
+    /*
+     * If container is under deleting and all it's replicas are deleted,
+     * then make the container as CLEANED,
+     * or resend the delete replica command if needed.
+     */
+    if (state == HddsProtos.LifeCycleState.DELETING) {
+      return handleDeletingContainer(containerInfo, replicas);
+    }
+
+    if (state == HddsProtos.LifeCycleState.DELETED) {
+      return health;
+    }
+
+    return null;
+  }
+
+  /**
+   * Handle the container which is in closing state.
+   *
+   * @param container ContainerInfo
+   * @param replicas Set of ContainerReplicas
+   */
+  private ContainerHealthResult handleClosingContainer(
+      final ContainerInfo container, final Set<ContainerReplica> replicas) {
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(container);
+    ContainerID containerID = container.containerID();
+    long term;
+    try {
+      term = scmContext.getTermOfLeader();
+    } catch (NotLeaderException nle) {
+      LOG.warn("Skip sending datanode command,"
+          + " since current SCM is not leader.", nle);
+      return health;
+    }
+
+    replicas.stream()
+        .filter(r -> r.getState() != ContainerReplicaProto.State.UNHEALTHY)
+        .forEach(r -> {
+          final CloseContainerCommand closeContainerCommand =
+              new CloseContainerCommand(
+                  containerID.getId(), container.getPipelineID(), false);
+          closeContainerCommand.setTerm(term);
+          closeContainerCommand.setEncodedToken(getContainerToken(containerID));

Review Comment:
   Can you please extract a method for creating the close container command?  It's repeated in a few places.



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java:
##########
@@ -448,6 +436,153 @@ protected ContainerHealthResult processContainer(ContainerInfo containerInfo,
     return health;
   }
 
+  private ContainerHealthResult checkContainerState(
+      final ContainerInfo containerInfo, final ReplicationManagerReport report)
+      throws ContainerNotFoundException {
+    ContainerID containerID = containerInfo.containerID();
+    Set<ContainerReplica> replicas = containerManager
+        .getContainerReplicas(containerID);
+    HddsProtos.LifeCycleState state = containerInfo.getState();
+    ContainerHealthResult health =
+        new ContainerHealthResult.HealthyResult(containerInfo);
+
+    if (state == HddsProtos.LifeCycleState.OPEN) {
+      if (!isOpenContainerHealthy(containerInfo, replicas)) {
+        report.incrementAndSample(
+            HealthState.OPEN_UNHEALTHY, containerID);
+        eventPublisher.fireEvent(SCMEvents.CLOSE_CONTAINER, containerID);
+        return new ContainerHealthResult.UnHealthyResult(containerInfo);
+      }
+      return health;
+    }
+
+    /*
+     * If the container is in CLOSING state, the replicas can either
+     * be in OPEN or in CLOSING state. In both of this cases
+     * we have to resend close container command to the datanodes.
+     */

Review Comment:
   Nit: I think these comments (this one and the one for "DELETING" state) would be better placed at the corresponding `handle...` methods.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@ozone.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@ozone.apache.org
For additional commands, e-mail: issues-help@ozone.apache.org