You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ozone.apache.org by GitBox <gi...@apache.org> on 2021/03/30 15:38:25 UTC

[GitHub] [ozone] linyiqun commented on a change in pull request #2093: HDDS-4495. [FSO]Delete : Implement async cleanup of garbage and orphan sub-dirs/files

linyiqun commented on a change in pull request #2093:
URL: https://github.com/apache/ozone/pull/2093#discussion_r604200821



##########
File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
##########
@@ -253,4 +253,17 @@ private OMConfigKeys() {
   public static final String OZONE_OM_LAYOUT_VERSION_DEFAULT = "V0";
 
   public static final String OZONE_OM_LAYOUT_VERSION_V1 = "V1";
+
+  /**
+   * Configuration properties for Dir Deleting Service.
+   */
+  public static final String OZONE_DIR_DELETING_SERVICE_INTERVAL =
+      "ozone.directory.deleting.service.interval";
+  public static final String OZONE_DIR_DELETING_SERVICE_INTERVAL_DEFAULT
+      = "30s";

Review comment:
       This default value here doesn't match that in ozone-default.xml 

##########
File path: hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DirectoryDeletingService.java
##########
@@ -0,0 +1,285 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.ozone.om;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.ServiceException;
+import org.apache.hadoop.hdds.utils.BackgroundService;
+import org.apache.hadoop.hdds.utils.BackgroundTask;
+import org.apache.hadoop.hdds.utils.BackgroundTaskQueue;
+import org.apache.hadoop.hdds.utils.BackgroundTaskResult;
+import org.apache.hadoop.ozone.om.helpers.OMRatisHelper;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.util.Time;
+import org.apache.ratis.protocol.ClientId;
+import org.apache.ratis.protocol.Message;
+import org.apache.ratis.protocol.RaftClientRequest;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.apache.hadoop.ozone.ClientVersions.CURRENT_VERSION;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT;
+
+/**
+ * This is a background service to delete orphan directories and its
+ * sub paths(sub-dirs and sub-files).
+ *
+ * <p>
+ * This will scan the metadata of om periodically to get the orphan dirs from
+ * DeletedDirectoryTable and find its sub paths. It will fetch all sub-files
+ * from KeyTable and move those to DeletedTable so that OM's
+ * KeyDeletingService will cleanup those files later. It will fetch all
+ * sub-directories from the DirectoryTable and move those to
+ * DeletedDirectoryTable so that these will be visited in next iterations.
+ *
+ * <p>
+ * After moving all sub-files and sub-dirs the parent orphan directory will be
+ * deleted by this service. It will continue traversing until all the leaf path
+ * components of an orphan directory is visited.
+ */
+public class DirectoryDeletingService extends BackgroundService {
+
+  private final KeyManager keyManager;
+  private final OzoneManager ozoneManager;
+  private AtomicLong deletedDirsCount;
+  private AtomicLong deletedFilesCount;
+  private final AtomicLong runCount;
+
+  private static ClientId clientId = ClientId.randomId();
+
+  // Use only a single thread for DirDeletion. Multiple threads would read
+  // or write to same tables and can send deletion requests for same key
+  // multiple times.
+  private static final int DIR_DELETING_CORE_POOL_SIZE = 1;
+
+  // Number of items(dirs/files) to be batched in an iteration.
+  private final long pathLimitPerTask;
+
+  public DirectoryDeletingService(long interval, TimeUnit unit,
+      long serviceTimeout, OzoneManager ozoneManager) {
+    super("DirectoryDeletingService", interval, unit,
+        DIR_DELETING_CORE_POOL_SIZE, serviceTimeout);
+    this.keyManager = ozoneManager.getKeyManager();
+    this.ozoneManager = ozoneManager;
+    this.deletedDirsCount = new AtomicLong(0);
+    this.deletedFilesCount = new AtomicLong(0);
+    this.runCount = new AtomicLong(0);
+    this.pathLimitPerTask = ozoneManager.getConfiguration()
+        .getInt(OZONE_PATH_DELETING_LIMIT_PER_TASK,
+            OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT);
+  }
+
+  private boolean shouldRun() {
+    if (ozoneManager == null) {
+      // OzoneManager can be null for testing
+      return true;
+    }
+    return ozoneManager.isLeaderReady();
+  }
+
+  private boolean isRatisEnabled() {
+    if (ozoneManager == null) {
+      return false;
+    }
+    return ozoneManager.isRatisEnabled();
+  }
+
+  @Override
+  public BackgroundTaskQueue getTasks() {
+    BackgroundTaskQueue queue = new BackgroundTaskQueue();
+    queue.add(new DirectoryDeletingService.DirDeletingTask());
+    return queue;
+  }
+
+  private class DirDeletingTask implements BackgroundTask {
+
+    @Override
+    public int getPriority() {
+      return 0;
+    }
+
+    @Override
+    public BackgroundTaskResult call() throws Exception {
+      if (shouldRun()) {
+        runCount.incrementAndGet();
+        long count = pathLimitPerTask;
+        try {
+          long startTime = Time.monotonicNow();
+          // step-1) Get one pending deleted directory
+          OmKeyInfo pendingDeletedDirInfo = keyManager.getPendingDeletionDir();
+          if (pendingDeletedDirInfo != null) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Pending deleted dir name: {}",
+                  pendingDeletedDirInfo.getKeyName());
+            }
+            // step-1: get all sub directories under the deletedDir
+            List<OmKeyInfo> dirs =
+                keyManager.getPendingDeletionSubDirs(pendingDeletedDirInfo,
+                    --count);
+            count = count - dirs.size();
+            List<OmKeyInfo> deletedSubDirList = new ArrayList<>();
+            for (OmKeyInfo dirInfo : dirs) {
+              deletedSubDirList.add(dirInfo);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("deleted sub dir name: {}",
+                    dirInfo.getKeyName());
+              }
+            }
+
+            // step-2: get all sub files under the deletedDir
+            List<OmKeyInfo> purgeDeletedFiles =
+                keyManager.getPendingDeletionSubFiles(pendingDeletedDirInfo,
+                    --count);
+
+            if (LOG.isDebugEnabled()) {
+              for (OmKeyInfo fileInfo : purgeDeletedFiles) {
+                LOG.debug("deleted sub file name: {}", fileInfo.getKeyName());
+              }
+            }
+
+            // step-3: Since there is a boundary condition of 'numEntries' in
+            // each batch, check whether the sub paths count reached batch size
+            // limit. If count reached limit then there can be some more child
+            // paths to be visited and will keep the parent deleted directory
+            // for one more pass.
+            List<String> purgeDeletedDirs = new ArrayList<>();
+            if (count > 0) {

Review comment:
       Here we at most purge one deleted dir, seems no need to use the List to store this.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@ozone.apache.org
For additional commands, e-mail: issues-help@ozone.apache.org