You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2017/05/24 08:42:29 UTC
hbase git commit: HBASE-18084 Improve CleanerChore to clean from
directory which consumes more disk space
Repository: hbase
Updated Branches:
refs/heads/master 80dd8bf51 -> 998bd5f90
HBASE-18084 Improve CleanerChore to clean from directory which consumes more disk space
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/998bd5f9
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/998bd5f9
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/998bd5f9
Branch: refs/heads/master
Commit: 998bd5f90e9be4a28a446045fdad38cc4b9b6b5d
Parents: 80dd8bf
Author: Yu Li <li...@apache.org>
Authored: Wed May 24 16:40:37 2017 +0800
Committer: Yu Li <li...@apache.org>
Committed: Wed May 24 16:41:04 2017 +0800
----------------------------------------------------------------------
.../hbase/master/cleaner/CleanerChore.java | 73 +++++++++++++++++---
1 file changed, 63 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/998bd5f9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
index 825feba..1e9a4fa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
@@ -21,6 +21,17 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -32,12 +43,6 @@ import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.ipc.RemoteException;
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicBoolean;
-
/**
* Abstract Cleaner that uses a chain of delegates to clean a directory of files
* @param <T> Cleaner delegate class that is dynamically loaded from configuration
@@ -151,6 +156,46 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
}
/**
+ * Sort the given list in (descending) order of the space each element takes
+ * @param dirs the list to sort, element in it should be directory (not file)
+ */
+ private void sortByConsumedSpace(List<FileStatus> dirs) {
+ if (dirs == null || dirs.size() < 2) {
+ // no need to sort for empty or single directory
+ return;
+ }
+ Collections.sort(dirs, new Comparator<FileStatus>() {
+ HashMap<FileStatus, Long> directorySpaces = new HashMap<FileStatus, Long>();
+
+ @Override
+ public int compare(FileStatus f1, FileStatus f2) {
+ long f1ConsumedSpace = getSpace(f1);
+ long f2ConsumedSpace = getSpace(f2);
+ return (f1ConsumedSpace > f2ConsumedSpace) ? -1
+ : (f1ConsumedSpace < f2ConsumedSpace ? 1 : 0);
+ }
+
+ private long getSpace(FileStatus f) {
+ Long cached = directorySpaces.get(f);
+ if (cached != null) {
+ return cached;
+ }
+ try {
+ long space =
+ f.isDirectory() ? fs.getContentSummary(f.getPath()).getSpaceConsumed() : f.getLen();
+ directorySpaces.put(f, space);
+ return space;
+ } catch (IOException e) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("failed to get space consumed by path " + f.getPath(), e);
+ }
+ return -1;
+ }
+ }
+ });
+ }
+
+ /**
* Loop over the given directory entries, and check whether they can be deleted.
* If an entry is itself a directory it will be recursively checked and deleted itself iff
* all subentries are deleted (and no new subentries are added in the mean time)
@@ -164,16 +209,24 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
}
boolean allEntriesDeleted = true;
List<FileStatus> files = Lists.newArrayListWithCapacity(entries.length);
+ List<FileStatus> dirs = new ArrayList<>();
for (FileStatus child : entries) {
- Path path = child.getPath();
if (child.isDirectory()) {
+ dirs.add(child);
+ } else {
+ // collect all files to attempt to delete in one batch
+ files.add(child);
+ }
+ }
+ if (dirs.size() > 0) {
+ sortByConsumedSpace(dirs);
+ LOG.debug("Prepared to delete files in directories: " + dirs);
+ for (FileStatus child : dirs) {
+ Path path = child.getPath();
// for each subdirectory delete it and all entries if possible
if (!checkAndDeleteDirectory(path)) {
allEntriesDeleted = false;
}
- } else {
- // collect all files to attempt to delete in one batch
- files.add(child);
}
}
if (!checkAndDeleteFiles(files)) {