You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2017/05/24 08:42:29 UTC

hbase git commit: HBASE-18084 Improve CleanerChore to clean from directory which consumes more disk space

Repository: hbase
Updated Branches:
  refs/heads/master 80dd8bf51 -> 998bd5f90


HBASE-18084 Improve CleanerChore to clean from directory which consumes more disk space


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/998bd5f9
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/998bd5f9
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/998bd5f9

Branch: refs/heads/master
Commit: 998bd5f90e9be4a28a446045fdad38cc4b9b6b5d
Parents: 80dd8bf
Author: Yu Li <li...@apache.org>
Authored: Wed May 24 16:40:37 2017 +0800
Committer: Yu Li <li...@apache.org>
Committed: Wed May 24 16:41:04 2017 +0800

----------------------------------------------------------------------
 .../hbase/master/cleaner/CleanerChore.java      | 73 +++++++++++++++++---
 1 file changed, 63 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/998bd5f9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
index 825feba..1e9a4fa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
@@ -21,6 +21,17 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -32,12 +43,6 @@ import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.ipc.RemoteException;
 
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicBoolean;
-
 /**
  * Abstract Cleaner that uses a chain of delegates to clean a directory of files
  * @param <T> Cleaner delegate class that is dynamically loaded from configuration
@@ -151,6 +156,46 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
   }
 
   /**
+   * Sort the given list in (descending) order of the space each element takes
+   * @param dirs the list to sort, element in it should be directory (not file)
+   */
+  private void sortByConsumedSpace(List<FileStatus> dirs) {
+    if (dirs == null || dirs.size() < 2) {
+      // no need to sort for empty or single directory
+      return;
+    }
+    Collections.sort(dirs, new Comparator<FileStatus>() {
+      HashMap<FileStatus, Long> directorySpaces = new HashMap<FileStatus, Long>();
+
+      @Override
+      public int compare(FileStatus f1, FileStatus f2) {
+        long f1ConsumedSpace = getSpace(f1);
+        long f2ConsumedSpace = getSpace(f2);
+        return (f1ConsumedSpace > f2ConsumedSpace) ? -1
+            : (f1ConsumedSpace < f2ConsumedSpace ? 1 : 0);
+      }
+
+      private long getSpace(FileStatus f) {
+        Long cached = directorySpaces.get(f);
+        if (cached != null) {
+          return cached;
+        }
+        try {
+          long space =
+              f.isDirectory() ? fs.getContentSummary(f.getPath()).getSpaceConsumed() : f.getLen();
+          directorySpaces.put(f, space);
+          return space;
+        } catch (IOException e) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("failed to get space consumed by path " + f.getPath(), e);
+          }
+          return -1;
+        }
+      }
+    });
+  }
+
+  /**
    * Loop over the given directory entries, and check whether they can be deleted.
    * If an entry is itself a directory it will be recursively checked and deleted itself iff
    * all subentries are deleted (and no new subentries are added in the mean time)
@@ -164,16 +209,24 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
     }
     boolean allEntriesDeleted = true;
     List<FileStatus> files = Lists.newArrayListWithCapacity(entries.length);
+    List<FileStatus> dirs = new ArrayList<>();
     for (FileStatus child : entries) {
-      Path path = child.getPath();
       if (child.isDirectory()) {
+        dirs.add(child);
+      } else {
+        // collect all files to attempt to delete in one batch
+        files.add(child);
+      }
+    }
+    if (dirs.size() > 0) {
+      sortByConsumedSpace(dirs);
+      LOG.debug("Prepared to delete files in directories: " + dirs);
+      for (FileStatus child : dirs) {
+        Path path = child.getPath();
         // for each subdirectory delete it and all entries if possible
         if (!checkAndDeleteDirectory(path)) {
           allEntriesDeleted = false;
         }
-      } else {
-        // collect all files to attempt to delete in one batch
-        files.add(child);
       }
     }
     if (!checkAndDeleteFiles(files)) {