You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2016/11/03 21:19:36 UTC
[5/5] hbase git commit: HBASE-16957 Moved cleaners from master
package to fs.legacy package and removed filesystem/ directory layout
references from a few files in master package
HBASE-16957 Moved cleaners from master package to fs.legacy package and removed filesystem/ directory layout references from a few files in master package
Incompatible config changes:
* hbase.master.logcleaner.plugins default value that users should always add when customizing changes from
org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner to org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveLogCleaner
* hbase.master.hfilecleaner.plugins default value that users should always add when customizing changes from
org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner to org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner
Signed-off-by: Sean Busbey <bu...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/53f4ec9e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/53f4ec9e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/53f4ec9e
Branch: refs/heads/hbase-14439
Commit: 53f4ec9e993d3cc11f89eb6c1295e4d0a6e1ce39
Parents: 983cad8
Author: Umesh Agashe <ua...@cloudera.com>
Authored: Fri Oct 21 14:28:17 2016 -0700
Committer: Sean Busbey <bu...@apache.org>
Committed: Thu Nov 3 16:13:44 2016 -0500
----------------------------------------------------------------------
.../src/main/resources/hbase-default.xml | 30 +-
.../example/LongTermArchivingHFileCleaner.java | 4 +-
.../apache/hadoop/hbase/fs/MasterStorage.java | 90 ++++-
.../hadoop/hbase/fs/legacy/LegacyLayout.java | 4 +
.../hbase/fs/legacy/LegacyMasterStorage.java | 113 +++++-
.../legacy/cleaner/BaseFileCleanerDelegate.java | 56 +++
.../cleaner/BaseHFileCleanerDelegate.java | 54 +++
.../legacy/cleaner/BaseLogCleanerDelegate.java | 57 +++
.../hbase/fs/legacy/cleaner/CleanerChore.java | 294 ++++++++++++++
.../fs/legacy/cleaner/FileCleanerDelegate.java | 47 +++
.../hbase/fs/legacy/cleaner/HFileCleaner.java | 72 ++++
.../fs/legacy/cleaner/HFileLinkCleaner.java | 113 ++++++
.../hbase/fs/legacy/cleaner/LogCleaner.java | 56 +++
.../legacy/cleaner/TimeToLiveHFileCleaner.java | 64 +++
.../fs/legacy/cleaner/TimeToLiveLogCleaner.java | 73 ++++
.../fs/legacy/snapshot/SnapshotFileCache.java | 390 ++++++++++++++++++
.../legacy/snapshot/SnapshotHFileCleaner.java | 126 ++++++
.../org/apache/hadoop/hbase/io/FileLink.java | 2 +-
.../hadoop/hbase/master/AssignmentManager.java | 44 +-
.../org/apache/hadoop/hbase/master/HMaster.java | 39 +-
.../hbase/master/MasterStatusServlet.java | 3 +-
.../master/cleaner/BaseFileCleanerDelegate.java | 56 ---
.../cleaner/BaseHFileCleanerDelegate.java | 54 ---
.../master/cleaner/BaseLogCleanerDelegate.java | 57 ---
.../hbase/master/cleaner/CleanerChore.java | 294 --------------
.../master/cleaner/FileCleanerDelegate.java | 47 ---
.../hbase/master/cleaner/HFileCleaner.java | 72 ----
.../hbase/master/cleaner/HFileLinkCleaner.java | 113 ------
.../hadoop/hbase/master/cleaner/LogCleaner.java | 56 ---
.../master/cleaner/TimeToLiveHFileCleaner.java | 64 ---
.../master/cleaner/TimeToLiveLogCleaner.java | 73 ----
.../master/snapshot/SnapshotFileCache.java | 389 ------------------
.../master/snapshot/SnapshotHFileCleaner.java | 126 ------
.../hbase/master/snapshot/SnapshotManager.java | 28 +-
.../hbase/protobuf/ServerProtobufUtil.java | 122 +-----
.../regionserver/StorefileRefresherChore.java | 2 +-
.../master/ReplicationHFileCleaner.java | 5 +-
.../master/ReplicationLogCleaner.java | 6 +-
.../replication/regionserver/Replication.java | 2 +-
.../org/apache/hadoop/hbase/util/FSUtils.java | 57 ---
.../resources/hbase-webapps/master/table.jsp | 4 +-
.../hadoop/hbase/HBaseTestingUtility.java | 14 +
.../hadoop/hbase/backup/TestHFileArchiving.java | 17 +-
.../TestZooKeeperTableArchiveClient.java | 4 +-
.../client/TestCloneSnapshotFromClient.java | 6 +-
.../client/TestRestoreSnapshotFromClient.java | 6 +-
.../fs/legacy/cleaner/TestCleanerChore.java | 319 +++++++++++++++
.../fs/legacy/cleaner/TestHFileCleaner.java | 263 ++++++++++++
.../fs/legacy/cleaner/TestHFileLinkCleaner.java | 201 +++++++++
.../fs/legacy/cleaner/TestLogsCleaner.java | 308 ++++++++++++++
.../legacy/cleaner/TestSnapshotFromMaster.java | 394 ++++++++++++++++++
.../legacy/snapshot/TestSnapshotFileCache.java | 288 +++++++++++++
.../snapshot/TestSnapshotHFileCleaner.java | 190 +++++++++
.../hbase/master/cleaner/TestCleanerChore.java | 319 ---------------
.../hbase/master/cleaner/TestHFileCleaner.java | 263 ------------
.../master/cleaner/TestHFileLinkCleaner.java | 201 ---------
.../hbase/master/cleaner/TestLogsCleaner.java | 309 --------------
.../cleaner/TestReplicationHFileCleaner.java | 341 ----------------
.../master/cleaner/TestSnapshotFromMaster.java | 404 -------------------
.../master/snapshot/TestSnapshotFileCache.java | 282 -------------
.../snapshot/TestSnapshotHFileCleaner.java | 190 ---------
.../master/snapshot/TestSnapshotManager.java | 5 +-
.../master/TestReplicationHFileCleaner.java | 341 ++++++++++++++++
src/main/asciidoc/_chapters/hbase-default.adoc | 4 +-
64 files changed, 3997 insertions(+), 4030 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-common/src/main/resources/hbase-default.xml
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 4f769cb..8bb46f5 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -122,13 +122,13 @@ possible configurations would overwhelm and obscure the important.
</property>
<property>
<name>hbase.master.logcleaner.plugins</name>
- <value>org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner</value>
- <description>A comma-separated list of BaseLogCleanerDelegate invoked by
- the LogsCleaner service. These WAL cleaners are called in order,
- so put the cleaner that prunes the most files in front. To
- implement your own BaseLogCleanerDelegate, just put it in HBase's classpath
- and add the fully qualified class name here. Always add the above
- default log cleaners in the list.</description>
+ <value>org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveLogCleaner</value>
+ <description>This property is used when 'hbase.storage.type' is set to 'legacy'.
+ A comma-separated list of BaseLogCleanerDelegate invoked by the LogsCleaner
+ service. These WAL cleaners are called in order, so put the cleaner that prunes
+ the most files in front. To implement your own BaseLogCleanerDelegate, just put
+ it in HBase's classpath and add the fully qualified class name here. Always add
+ the above default log cleaners in the list.</description>
</property>
<property>
<name>hbase.master.logcleaner.ttl</name>
@@ -138,14 +138,14 @@ possible configurations would overwhelm and obscure the important.
</property>
<property>
<name>hbase.master.hfilecleaner.plugins</name>
- <value>org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner</value>
- <description>A comma-separated list of BaseHFileCleanerDelegate invoked by
- the HFileCleaner service. These HFiles cleaners are called in order,
- so put the cleaner that prunes the most files in front. To
- implement your own BaseHFileCleanerDelegate, just put it in HBase's classpath
- and add the fully qualified class name here. Always add the above
- default log cleaners in the list as they will be overwritten in
- hbase-site.xml.</description>
+ <value>org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner</value>
+ <description>This property is used when 'hbase.storage.type' is set to 'legacy'.
+ A comma-separated list of BaseHFileCleanerDelegate invoked by the HFileCleaner
+ service. These HFiles cleaners are called in order, so put the cleaner that
+ prunes the most files in front. To implement your own BaseHFileCleanerDelegate,
+ just put it in HBase's classpath and add the fully qualified class name here.
+ Always add the above default log cleaners in the list as they will be
+ overwritten in hbase-site.xml.</description>
</property>
<property>
<name>hbase.master.infoserver.redirect</name>
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
index 09a6659..ad2e8b4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.BaseHFileCleanerDelegate;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.zookeeper.KeeperException;
@@ -36,7 +36,7 @@ import org.apache.zookeeper.KeeperException;
* currently being archived.
* <p>
* This only works properly if the
- * {@link org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner}
+ * {@link org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner}
* is also enabled (it always should be), since it may take a little time
* for the ZK notification to propagate, in which case we may accidentally
* delete some files.
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
index 0ccae4a..2f3b4a4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
@@ -22,18 +22,22 @@ package org.apache.hadoop.hbase.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.ClusterId;
import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.fs.legacy.LegacyMasterStorage;
import org.apache.hadoop.hbase.fs.RegionStorage.StoreFileVisitor;
import org.apache.hadoop.hbase.fs.legacy.LegacyPathIdentifier;
@@ -62,6 +66,39 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
public FileSystem getFileSystem() { return fs; } // TODO: definitely remove
public IDENTIFIER getRootContainer() { return rootContainer; }
+ /**
+ * Get Chores that are required to be run from time to time for the underlying MasterStorage
+ * implementation. A few setup methods e.g. {@link #enableSnapshots()} may have their own chores.
+ * The returned list of chores or their configuration may vary depending on when in sequence
+ * this method is called with respect to other methods. Generally, a call to this method for
+ * getting and scheduling chores, needs to be after storage is setup properly by calling those
+ * methods first.
+ *
+ * Please refer to the documentation of specific method implementation for more details.
+ *
+ * @param stopper the stopper
+ * @return storage chores.
+ */
+ public Iterable<ScheduledChore> getChores(Stoppable stopper, Map<String, Object> params) {
+ return new ArrayList<>();
+ }
+
+ /**
+ * This method should be called to prepare storage implementation/s for snapshots. The default
+ * implementation does nothing. MasterStorage subclasses need to override this method to
+ * provide specific preparatory steps.
+ */
+ public void enableSnapshots() {
+ return;
+ }
+
+ /**
+ * Returns true if MasterStorage is prepared for snapshots
+ */
+ public boolean isSnapshotsEnabled() {
+ return true;
+ }
+
// ==========================================================================
// PUBLIC Interfaces - Visitors
// ==========================================================================
@@ -178,6 +215,45 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
*/
public abstract void archiveTable(StorageContext ctx, TableName tableName) throws IOException;
+ /**
+ * Runs through all tables and checks how many stores for each table
+ * have more than one file in them. Checks -ROOT- and hbase:meta too. The total
+ * percentage across all tables is stored under the special key "-TOTAL-".
+ *
+ * @return A map for each table and its percentage.
+ *
+ * @throws IOException When scanning the directory fails.
+ */
+ public Map<String, Integer> getTableFragmentation() throws IOException {
+ final Map<String, Integer> frags = new HashMap<>();
+ int cfCountTotal = 0;
+ int cfFragTotal = 0;
+
+ for (TableName table: getTables()) {
+ int cfCount = 0;
+ int cfFrag = 0;
+ for (HRegionInfo hri: getRegions(table)) {
+ RegionStorage<IDENTIFIER> rs = getRegionStorage(hri);
+ final Collection<String> families = rs.getFamilies();
+ for (String family: families) {
+ cfCount++;
+ cfCountTotal++;
+ if (rs.getStoreFiles(family).size() > 1) {
+ cfFrag++;
+ cfFragTotal++;
+ }
+ }
+ }
+ // compute percentage per table and store in result list
+ frags.put(table.getNameAsString(),
+ cfCount == 0? 0: Math.round((float) cfFrag / cfCount * 100));
+ }
+ // set overall percentage for all tables
+ frags.put("-TOTAL-",
+ cfCountTotal == 0? 0: Math.round((float) cfFragTotal / cfCountTotal * 100));
+ return frags;
+ }
+
// ==========================================================================
// PUBLIC Methods - Table Region related
// ==========================================================================
@@ -232,6 +308,16 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
public abstract void archiveRegion(HRegionInfo regionInfo) throws IOException;
// ==========================================================================
+ // PUBLIC Methods - WAL
+ // ==========================================================================
+
+ /**
+ * Returns true if given region server has non-empty WAL files
+ * @param serverName
+ */
+ public abstract boolean hasWALs(String serverName) throws IOException;
+
+ // ==========================================================================
// PUBLIC Methods - visitors
// ==========================================================================
public void visitStoreFiles(StoreFileVisitor visitor)
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
index ce29bb2..2906f91 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
@@ -124,4 +124,8 @@ public final class LegacyLayout {
public static Path getBulkDir(Path rootDir) {
return new Path(rootDir, HConstants.BULKLOAD_STAGING_DIR_NAME);
}
+
+ public static Path getOldLogDir(final Path rootDir) {
+ return new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
index b6443de..aa4de2c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
@@ -24,6 +24,9 @@ import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -34,8 +37,19 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.ClusterId;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.HFileCleaner;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.HFileLinkCleaner;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.LogCleaner;
+import org.apache.hadoop.hbase.fs.legacy.snapshot.SnapshotHFileCleaner;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -80,6 +94,8 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
private final boolean isSecurityEnabled;
+ public static final String SPLITTING_EXT = "-splitting";
+
public LegacyMasterStorage(Configuration conf, FileSystem fs, LegacyPathIdentifier rootDir) {
super(conf, fs, rootDir);
@@ -97,6 +113,54 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
this.isSecurityEnabled = "kerberos".equalsIgnoreCase(conf.get("hbase.security.authentication"));
}
+ @Override
+ public Iterable<ScheduledChore> getChores(Stoppable stopper, Map<String, Object> params) {
+ ArrayList<ScheduledChore> chores = (ArrayList<ScheduledChore>) super.getChores(stopper, params);
+
+ int cleanerInterval = getConfiguration().getInt("hbase.master.cleaner.interval", 60 * 1000);
+ // add log cleaner chore
+ chores.add(new LogCleaner(cleanerInterval, stopper, getConfiguration(), getFileSystem(),
+ LegacyLayout.getOldLogDir(getRootContainer().path)));
+ // add hfile archive cleaner chore
+ chores.add(new HFileCleaner(cleanerInterval, stopper, getConfiguration(), getFileSystem(),
+ LegacyLayout.getArchiveDir(getRootContainer().path), params));
+
+ return chores;
+ }
+
+ /**
+ * This method modifies chores configuration for snapshots. Please call this method before
+ * instantiating and scheduling list of chores with {@link #getChores(Stoppable, Map)}.
+ */
+ @Override
+ public void enableSnapshots() {
+ super.enableSnapshots();
+ if (!isSnapshotsEnabled()) {
+ // Extract cleaners from conf
+ Set<String> hfileCleaners = new HashSet<>();
+ String[] cleaners = getConfiguration().getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
+ if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
+
+ // add snapshot related cleaners
+ hfileCleaners.add(SnapshotHFileCleaner.class.getName());
+ hfileCleaners.add(HFileLinkCleaner.class.getName());
+
+ // Set cleaners conf
+ getConfiguration().setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
+ hfileCleaners.toArray(new String[hfileCleaners.size()]));
+ }
+ }
+
+ @Override
+ public boolean isSnapshotsEnabled() {
+ // Extract cleaners from conf
+ Set<String> hfileCleaners = new HashSet<>();
+ String[] cleaners = getConfiguration().getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
+ if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
+ return hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
+ hfileCleaners.contains(HFileLinkCleaner.class.getName());
+ }
+
// ==========================================================================
// PUBLIC Methods - Namespace related
// ==========================================================================
@@ -268,6 +332,53 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
}
// ==========================================================================
+ // PUBLIC - WAL
+ // ==========================================================================
+ @Override
+ public boolean hasWALs(String serverName) throws IOException {
+ Path logDir = new Path(getRootContainer().path, new StringBuilder(
+ HConstants.HREGION_LOGDIR_NAME).append("/").append(serverName).toString());
+ Path splitDir = logDir.suffix(SPLITTING_EXT);
+
+ return checkWALs(logDir) || checkWALs(splitDir);
+ }
+
+
+ // ==========================================================================
+ // PRIVATE - WAL
+ // ==========================================================================
+
+ private boolean checkWALs(Path dir) throws IOException {
+ FileSystem fs = getFileSystem();
+
+ if (!fs.exists(dir)) {
+ LOG.debug(dir + " not found!");
+ return false;
+ } else if (!fs.getFileStatus(dir).isDirectory()) {
+ LOG.warn(dir + " is not a directory");
+ return false;
+ }
+
+ FileStatus[] files = FSUtils.listStatus(fs, dir);
+ if (files == null || files.length == 0) {
+ LOG.debug(dir + " has no files");
+ return false;
+ }
+
+ for (FileStatus dentry: files) {
+ if (dentry.isFile() && dentry.getLen() > 0) {
+ LOG.debug(dir + " has a non-empty file: " + dentry.getPath());
+ return true;
+ } else if (dentry.isDirectory() && checkWALs(dentry.getPath())) {
+ LOG.debug(dentry + " is a directory and has a non-empty file!");
+ return true;
+ }
+ }
+ LOG.debug("Found zero non-empty wal files for: " + dir);
+ return false;
+ }
+
+ // ==========================================================================
// PROTECTED Methods - Bootstrap
// ==========================================================================
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
new file mode 100644
index 0000000..6ab33a7
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.BaseConfigurable;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+
+import java.util.Map;
+
+/**
+ * Base class for file cleaners which allows subclasses to implement a simple
+ * isFileDeletable method (which used to be the FileCleanerDelegate contract).
+ */
+public abstract class BaseFileCleanerDelegate extends BaseConfigurable
+implements FileCleanerDelegate {
+
+ @Override
+ public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
+ return Iterables.filter(files, new Predicate<FileStatus>() {
+ @Override
+ public boolean apply(FileStatus file) {
+ return isFileDeletable(file);
+ }});
+ }
+
+ @Override
+ public void init(Map<String, Object> params) {
+ // subclass could override it if needed.
+ }
+
+ /**
+ * Should the master delete the file or keep it?
+ * @param fStat file status of the file to check
+ * @return <tt>true</tt> if the file is deletable, <tt>false</tt> if not
+ */
+ protected abstract boolean isFileDeletable(FileStatus fStat);
+
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
new file mode 100644
index 0000000..a5018bd
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+
+/**
+ * Base class for the hfile cleaning function inside the master. By default, only the
+ * {@link TimeToLiveHFileCleaner} is called.
+ * <p>
+ * If other effects are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.hfilecleaner.plugins", which is a comma-separated list of fully
+ * qualified class names. The <code>HFileCleaner</code> will build the cleaner chain in
+ * order the order specified by the configuration.
+ * </p>
+ * <p>
+ * For subclasses, setConf will be called exactly <i>once</i> before using the cleaner.
+ * </p>
+ * <p>
+ * Since {@link BaseHFileCleanerDelegate HFileCleanerDelegates} are created in
+ * HFileCleaner by reflection, classes that implements this interface <b>must</b>
+ * provide a default constructor.
+ * </p>
+ */
+@InterfaceAudience.Private
+public abstract class BaseHFileCleanerDelegate extends BaseFileCleanerDelegate {
+
+ private boolean stopped = false;
+
+ @Override
+ public void stop(String why) {
+ this.stopped = true;
+ }
+
+ @Override
+ public boolean isStopped() {
+ return this.stopped;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
new file mode 100644
index 0000000..4d4e31f
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Base class for the log cleaning function inside the master. By default, two
+ * cleaners: <code>TimeToLiveLogCleaner</code> and
+ * <code>ReplicationLogCleaner</code> are called in order. So if other effects
+ * are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.logcleaner.plugins", which is a comma-separated
+ * list of fully qualified class names. LogsCleaner will add it to the chain.
+ * <p>
+ * HBase ships with LogsCleaner as the default implementation.
+ * <p>
+ * This interface extends Configurable, so setConf needs to be called once
+ * before using the cleaner. Since LogCleanerDelegates are created in
+ * LogsCleaner by reflection. Classes that implements this interface should
+ * provide a default constructor.
+ */
+@InterfaceAudience.Private
+public abstract class BaseLogCleanerDelegate extends BaseFileCleanerDelegate {
+
+ @Override
+ public boolean isFileDeletable(FileStatus fStat) {
+ return isLogDeletable(fStat);
+ }
+
+ /**
+ * Should the master delete the log or keep it?
+ * <p>
+ * Implementing classes should override {@link #isFileDeletable(FileStatus)} instead.
+ * @param fStat file status of the file
+ * @return true if the log is deletable, false (default) if not
+ */
+ @Deprecated
+ public boolean isLogDeletable(FileStatus fStat) {
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
new file mode 100644
index 0000000..ed23d96
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
@@ -0,0 +1,294 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.ipc.RemoteException;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Abstract Cleaner that uses a chain of delegates to clean a directory of files
+ * @param <T> Cleaner delegate class that is dynamically loaded from configuration
+ */
+public abstract class CleanerChore<T extends FileCleanerDelegate> extends ScheduledChore {
+
+ private static final Log LOG = LogFactory.getLog(CleanerChore.class.getName());
+
+ private final FileSystem fs;
+ private final Path oldFileDir;
+ private final Configuration conf;
+ protected List<T> cleanersChain;
+ protected Map<String, Object> params;
+
+ public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf,
+ FileSystem fs, Path oldFileDir, String confKey) {
+ this(name, sleepPeriod, s, conf, fs, oldFileDir, confKey, null);
+ }
+
+ /**
+ * @param name name of the chore being run
+ * @param sleepPeriod the period of time to sleep between each run
+ * @param s the stopper
+ * @param conf configuration to use
+ * @param fs handle to the FS
+ * @param oldFileDir the path to the archived files
+ * @param confKey configuration key for the classes to instantiate
+ * @param params members could be used in cleaner
+ */
+ public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf,
+ FileSystem fs, Path oldFileDir, String confKey, Map<String, Object> params) {
+ super(name, s, sleepPeriod);
+ this.fs = fs;
+ this.oldFileDir = oldFileDir;
+ this.conf = conf;
+ this.params = params;
+ initCleanerChain(confKey);
+ }
+
+
+ /**
+ * Validate the file to see if it even belongs in the directory. If it is valid, then the file
+ * will go through the cleaner delegates, but otherwise the file is just deleted.
+ * @param file full {@link Path} of the file to be checked
+ * @return <tt>true</tt> if the file is valid, <tt>false</tt> otherwise
+ */
+ protected abstract boolean validate(Path file);
+
+ /**
+ * Instantiate and initialize all the file cleaners set in the configuration
+ * @param confKey key to get the file cleaner classes from the configuration
+ */
+ private void initCleanerChain(String confKey) {
+ this.cleanersChain = new LinkedList<T>();
+ String[] logCleaners = conf.getStrings(confKey);
+ if (logCleaners != null) {
+ for (String className : logCleaners) {
+ T logCleaner = newFileCleaner(className, conf);
+ if (logCleaner != null) {
+ LOG.debug("initialize cleaner=" + className);
+ this.cleanersChain.add(logCleaner);
+ }
+ }
+ }
+ }
+
+ /**
+ * A utility method to create new instances of LogCleanerDelegate based on the class name of the
+ * LogCleanerDelegate.
+ * @param className fully qualified class name of the LogCleanerDelegate
+ * @param conf
+ * @return the new instance
+ */
+ private T newFileCleaner(String className, Configuration conf) {
+ try {
+ Class<? extends FileCleanerDelegate> c = Class.forName(className).asSubclass(
+ FileCleanerDelegate.class);
+ @SuppressWarnings("unchecked")
+ T cleaner = (T) c.newInstance();
+ cleaner.setConf(conf);
+ cleaner.init(this.params);
+ return cleaner;
+ } catch (Exception e) {
+ LOG.warn("Can NOT create CleanerDelegate: " + className, e);
+ // skipping if can't instantiate
+ return null;
+ }
+ }
+
+ @Override
+ protected void chore() {
+ try {
+ FileStatus[] files = FSUtils.listStatus(this.fs, this.oldFileDir);
+ checkAndDeleteEntries(files);
+ } catch (IOException e) {
+ e = e instanceof RemoteException ?
+ ((RemoteException)e).unwrapRemoteException() : e;
+ LOG.warn("Error while cleaning the logs", e);
+ }
+ }
+
+ /**
+ * Loop over the given directory entries, and check whether they can be deleted.
+ * If an entry is itself a directory it will be recursively checked and deleted itself iff
+ * all subentries are deleted (and no new subentries are added in the mean time)
+ *
+ * @param entries directory entries to check
+ * @return true if all entries were successfully deleted
+ */
+ private boolean checkAndDeleteEntries(FileStatus[] entries) {
+ if (entries == null) {
+ return true;
+ }
+ boolean allEntriesDeleted = true;
+ List<FileStatus> files = Lists.newArrayListWithCapacity(entries.length);
+ for (FileStatus child : entries) {
+ Path path = child.getPath();
+ if (child.isDirectory()) {
+ // for each subdirectory delete it and all entries if possible
+ if (!checkAndDeleteDirectory(path)) {
+ allEntriesDeleted = false;
+ }
+ } else {
+ // collect all files to attempt to delete in one batch
+ files.add(child);
+ }
+ }
+ if (!checkAndDeleteFiles(files)) {
+ allEntriesDeleted = false;
+ }
+ return allEntriesDeleted;
+ }
+
+ /**
+ * Attempt to delete a directory and all files under that directory. Each child file is passed
+ * through the delegates to see if it can be deleted. If the directory has no children when the
+ * cleaners have finished it is deleted.
+ * <p>
+ * If new children files are added between checks of the directory, the directory will <b>not</b>
+ * be deleted.
+ * @param dir directory to check
+ * @return <tt>true</tt> if the directory was deleted, <tt>false</tt> otherwise.
+ */
+ @VisibleForTesting boolean checkAndDeleteDirectory(Path dir) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Checking directory: " + dir);
+ }
+
+ try {
+ FileStatus[] children = FSUtils.listStatus(fs, dir);
+ boolean allChildrenDeleted = checkAndDeleteEntries(children);
+
+ // if the directory still has children, we can't delete it, so we are done
+ if (!allChildrenDeleted) return false;
+ } catch (IOException e) {
+ e = e instanceof RemoteException ?
+ ((RemoteException)e).unwrapRemoteException() : e;
+ LOG.warn("Error while listing directory: " + dir, e);
+ // couldn't list directory, so don't try to delete, and don't return success
+ return false;
+ }
+
+ // otherwise, all the children (that we know about) have been deleted, so we should try to
+ // delete this directory. However, don't do so recursively so we don't delete files that have
+ // been added since we last checked.
+ try {
+ return fs.delete(dir, false);
+ } catch (IOException e) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Couldn't delete directory: " + dir, e);
+ }
+ // couldn't delete w/o exception, so we can't return success.
+ return false;
+ }
+ }
+
+ /**
+ * Run the given files through each of the cleaners to see if it should be deleted, deleting it if
+ * necessary.
+ * @param files List of FileStatus for the files to check (and possibly delete)
+ * @return true iff successfully deleted all files
+ */
+ private boolean checkAndDeleteFiles(List<FileStatus> files) {
+ // first check to see if the path is valid
+ List<FileStatus> validFiles = Lists.newArrayListWithCapacity(files.size());
+ List<FileStatus> invalidFiles = Lists.newArrayList();
+ for (FileStatus file : files) {
+ if (validate(file.getPath())) {
+ validFiles.add(file);
+ } else {
+ LOG.warn("Found a wrongly formatted file: " + file.getPath() + " - will delete it.");
+ invalidFiles.add(file);
+ }
+ }
+
+ Iterable<FileStatus> deletableValidFiles = validFiles;
+ // check each of the cleaners for the valid files
+ for (T cleaner : cleanersChain) {
+ if (cleaner.isStopped() || this.getStopper().isStopped()) {
+ LOG.warn("A file cleaner" + this.getName() + " is stopped, won't delete any more files in:"
+ + this.oldFileDir);
+ return false;
+ }
+
+ Iterable<FileStatus> filteredFiles = cleaner.getDeletableFiles(deletableValidFiles);
+
+ // trace which cleaner is holding on to each file
+ if (LOG.isTraceEnabled()) {
+ ImmutableSet<FileStatus> filteredFileSet = ImmutableSet.copyOf(filteredFiles);
+ for (FileStatus file : deletableValidFiles) {
+ if (!filteredFileSet.contains(file)) {
+ LOG.trace(file.getPath() + " is not deletable according to:" + cleaner);
+ }
+ }
+ }
+
+ deletableValidFiles = filteredFiles;
+ }
+
+ Iterable<FileStatus> filesToDelete = Iterables.concat(invalidFiles, deletableValidFiles);
+ int deletedFileCount = 0;
+ for (FileStatus file : filesToDelete) {
+ Path filePath = file.getPath();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Removing: " + filePath + " from archive");
+ }
+ try {
+ boolean success = this.fs.delete(filePath, false);
+ if (success) {
+ deletedFileCount++;
+ } else {
+ LOG.warn("Attempted to delete:" + filePath
+ + ", but couldn't. Run cleaner chain and attempt to delete on next pass.");
+ }
+ } catch (IOException e) {
+ e = e instanceof RemoteException ?
+ ((RemoteException)e).unwrapRemoteException() : e;
+ LOG.warn("Error while deleting: " + filePath, e);
+ }
+ }
+
+ return deletedFileCount == files.size();
+ }
+
+ @Override
+ public void cleanup() {
+ for (T lc : this.cleanersChain) {
+ try {
+ lc.stop("Exiting");
+ } catch (Throwable t) {
+ LOG.warn("Stopping", t);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
new file mode 100644
index 0000000..0291640
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.Stoppable;
+
+import java.util.Map;
+
+/**
+ * General interface for cleaning files from a folder (generally an archive or
+ * backup folder). These are chained via the {@link CleanerChore} to determine
+ * if a given file should be deleted.
+ */
+@InterfaceAudience.Private
+public interface FileCleanerDelegate extends Configurable, Stoppable {
+
+ /**
+ * Determines which of the given files are safe to delete
+ * @param files files to check for deletion
+ * @return files that are ok to delete according to this cleaner
+ */
+ Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files);
+
+
+ /**
+ * this method is used to pass some instance into subclass
+ * */
+ void init(Map<String, Object> params);
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
new file mode 100644
index 0000000..7137432
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+/**
+ * This Chore, every time it runs, will clear the HFiles in the hfile archive
+ * folder that are deletable for each HFile cleaner in the chain.
+ */
+@InterfaceAudience.Private
+public class HFileCleaner extends CleanerChore<BaseHFileCleanerDelegate> {
+
+ public static final String MASTER_HFILE_CLEANER_PLUGINS = "hbase.master.hfilecleaner.plugins";
+
+ public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs,
+ Path directory) {
+ this(period, stopper, conf, fs, directory, null);
+ }
+
+ /**
+ * @param period the period of time to sleep between each run
+ * @param stopper the stopper
+ * @param conf configuration to use
+ * @param fs handle to the FS
+ * @param directory directory to be cleaned
+ * @param params params could be used in subclass of BaseHFileCleanerDelegate
+ */
+ public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs,
+ Path directory, Map<String, Object> params) {
+ super("HFileCleaner", period, stopper, conf, fs,
+ directory, MASTER_HFILE_CLEANER_PLUGINS, params);
+ }
+
+ @Override
+ protected boolean validate(Path file) {
+ if (HFileLink.isBackReferencesDir(file) || HFileLink.isBackReferencesDir(file.getParent())) {
+ return true;
+ }
+ return StoreFileInfo.validateStoreFileName(file.getName());
+ }
+
+ /**
+ * Exposed for TESTING!
+ */
+ public List<BaseHFileCleanerDelegate> getDelegatesForTesting() {
+ return this.cleanersChain;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
new file mode 100644
index 0000000..7c1f4d2
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * HFileLink cleaner that determines if a hfile should be deleted.
+ * HFiles can be deleted only if there're no links to them.
+ *
+ * When a HFileLink is created a back reference file is created in:
+ * /hbase/archive/table/region/cf/.links-hfile/ref-region.ref-table
+ * To check if the hfile can be deleted the back references folder must be empty.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class HFileLinkCleaner extends BaseHFileCleanerDelegate {
+ private static final Log LOG = LogFactory.getLog(HFileLinkCleaner.class);
+
+ private FileSystem fs = null;
+
+ @Override
+ public synchronized boolean isFileDeletable(FileStatus fStat) {
+ if (this.fs == null) return false;
+ Path filePath = fStat.getPath();
+ // HFile Link is always deletable
+ if (HFileLink.isHFileLink(filePath)) return true;
+
+ // If the file is inside a link references directory, means that it is a back ref link.
+ // The back ref can be deleted only if the referenced file doesn't exists.
+ Path parentDir = filePath.getParent();
+ if (HFileLink.isBackReferencesDir(parentDir)) {
+ Path hfilePath = null;
+ try {
+ // Also check if the HFile is in the HBASE_TEMP_DIRECTORY; this is where the referenced
+ // file gets created when cloning a snapshot.
+ hfilePath = HFileLink.getHFileFromBackReference(
+ new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY), filePath);
+ if (fs.exists(hfilePath)) {
+ return false;
+ }
+ // check whether the HFileLink still exists in mob dir.
+ hfilePath = HFileLink.getHFileFromBackReference(MobUtils.getMobHome(getConf()), filePath);
+ if (fs.exists(hfilePath)) {
+ return false;
+ }
+ hfilePath = HFileLink.getHFileFromBackReference(FSUtils.getRootDir(getConf()), filePath);
+ return !fs.exists(hfilePath);
+ } catch (IOException e) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Couldn't verify if the referenced file still exists, keep it just in case: "
+ + hfilePath);
+ }
+ return false;
+ }
+ }
+
+ // HFile is deletable only if has no links
+ Path backRefDir = null;
+ try {
+ backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName());
+ return FSUtils.listStatus(fs, backRefDir) == null;
+ } catch (IOException e) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Couldn't get the references, not deleting file, just in case. filePath="
+ + filePath + ", backRefDir=" + backRefDir);
+ }
+ return false;
+ }
+ }
+
+ @Override
+ public synchronized void setConf(Configuration conf) {
+ super.setConf(conf);
+
+ // setup filesystem
+ try {
+ this.fs = FileSystem.get(this.getConf());
+ } catch (IOException e) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Couldn't instantiate the file system, not deleting file, just in case. "
+ + FileSystem.FS_DEFAULT_NAME_KEY + "="
+ + getConf().get(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
new file mode 100644
index 0000000..430c482
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+
+/**
+ * This Chore, every time it runs, will attempt to delete the WALs in the old logs folder. The WAL
+ * is only deleted if none of the cleaner delegates says otherwise.
+ * @see BaseLogCleanerDelegate
+ */
+@InterfaceAudience.Private
+public class LogCleaner extends CleanerChore<BaseLogCleanerDelegate> {
+ private static final Log LOG = LogFactory.getLog(LogCleaner.class.getName());
+
+ /**
+ * @param p the period of time to sleep between each run
+ * @param s the stopper
+ * @param conf configuration to use
+ * @param fs handle to the FS
+ * @param oldLogDir the path to the archived logs
+ */
+ public LogCleaner(final int p, final Stoppable s, Configuration conf, FileSystem fs,
+ Path oldLogDir) {
+ super("LogsCleaner", p, s, conf, fs, oldLogDir, HBASE_MASTER_LOGCLEANER_PLUGINS);
+ }
+
+ @Override
+ protected boolean validate(Path file) {
+ return AbstractFSWALProvider.validateWALFilename(file.getName());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
new file mode 100644
index 0000000..177df38
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+/**
+ * HFile cleaner that uses the timestamp of the hfile to determine if it should be deleted. By
+ * default they are allowed to live for {@value #DEFAULT_TTL}
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class TimeToLiveHFileCleaner extends BaseHFileCleanerDelegate {
+
+ private static final Log LOG = LogFactory.getLog(TimeToLiveHFileCleaner.class.getName());
+ public static final String TTL_CONF_KEY = "hbase.master.hfilecleaner.ttl";
+ // default ttl = 5 minutes
+ public static final long DEFAULT_TTL = 60000 * 5;
+ // Configured time a hfile can be kept after it was moved to the archive
+ private long ttl;
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.ttl = conf.getLong(TTL_CONF_KEY, DEFAULT_TTL);
+ super.setConf(conf);
+ }
+
+ @Override
+ public boolean isFileDeletable(FileStatus fStat) {
+ long currentTime = EnvironmentEdgeManager.currentTime();
+ long time = fStat.getModificationTime();
+ long life = currentTime - time;
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("HFile life:" + life + ", ttl:" + ttl + ", current:" + currentTime + ", from: "
+ + time);
+ }
+ if (life < 0) {
+ LOG.warn("Found a hfile (" + fStat.getPath() + ") newer than current time (" + currentTime
+ + " < " + time + "), probably a clock skew");
+ return false;
+ }
+ return life > ttl;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
new file mode 100644
index 0000000..4aad961
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+/**
+ * Log cleaner that uses the timestamp of the wal to determine if it should
+ * be deleted. By default they are allowed to live for 10 minutes.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class TimeToLiveLogCleaner extends BaseLogCleanerDelegate {
+ private static final Log LOG = LogFactory.getLog(TimeToLiveLogCleaner.class.getName());
+ // Configured time a log can be kept after it was closed
+ private long ttl;
+ private boolean stopped = false;
+
+ @Override
+ public boolean isLogDeletable(FileStatus fStat) {
+ long currentTime = EnvironmentEdgeManager.currentTime();
+ long time = fStat.getModificationTime();
+ long life = currentTime - time;
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Log life:" + life + ", ttl:" + ttl + ", current:" + currentTime + ", from: "
+ + time);
+ }
+ if (life < 0) {
+ LOG.warn("Found a log (" + fStat.getPath() + ") newer than current time (" + currentTime
+ + " < " + time + "), probably a clock skew");
+ return false;
+ }
+ return life > ttl;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ super.setConf(conf);
+ this.ttl = conf.getLong("hbase.master.logcleaner.ttl", 600000);
+ }
+
+
+ @Override
+ public void stop(String why) {
+ this.stopped = true;
+ }
+
+ @Override
+ public boolean isStopped() {
+ return this.stopped;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
new file mode 100644
index 0000000..f9f1c67
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
@@ -0,0 +1,390 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.snapshot;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * Intelligently keep track of all the files for all the snapshots.
+ * <p>
+ * A cache of files is kept to avoid querying the {@link FileSystem} frequently. If there is a cache
+ * miss the directory modification time is used to ensure that we don't rescan directories that we
+ * already have in cache. We only check the modification times of the snapshot directories
+ * (/hbase/.snapshot/[snapshot_name]) to determine if the files need to be loaded into the cache.
+ * <p>
+ * New snapshots will be added to the cache and deleted snapshots will be removed when we refresh
+ * the cache. If the files underneath a snapshot directory are changed, but not the snapshot itself,
+ * we will ignore updates to that snapshot's files.
+ * <p>
+ * This is sufficient because each snapshot has its own directory and is added via an atomic rename
+ * <i>once</i>, when the snapshot is created. We don't need to worry about the data in the snapshot
+ * being run.
+ * <p>
+ * Further, the cache is periodically refreshed ensure that files in snapshots that were deleted are
+ * also removed from the cache.
+ * <p>
+ * A {@link SnapshotFileCache.SnapshotFileInspector} must be passed when creating <tt>this</tt> to
+ * allow extraction of files under /hbase/.snapshot/[snapshot name] directory, for each snapshot.
+ * This allows you to only cache files under, for instance, all the logs in the .logs directory or
+ * all the files under all the regions.
+ * <p>
+ * <tt>this</tt> also considers all running snapshots (those under /hbase/.snapshot/.tmp) as valid
+ * snapshots and will attempt to cache files from those snapshots as well.
+ * <p>
+ * Queries about a given file are thread-safe with respect to multiple queries and cache refreshes.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class SnapshotFileCache implements Stoppable {
+ interface SnapshotFileInspector {
+ /**
+ * Returns a collection of file names needed by the snapshot.
+ * @param snapshotDir {@link Path} to the snapshot directory to scan.
+ * @return the collection of file names needed by the snapshot.
+ */
+ Collection<String> filesUnderSnapshot(final Path snapshotDir) throws IOException;
+ }
+
+ private static final Log LOG = LogFactory.getLog(SnapshotFileCache.class);
+ private volatile boolean stop = false;
+ private final FileSystem fs;
+ private final SnapshotFileInspector fileInspector;
+ private final Path snapshotDir;
+ private final Set<String> cache = new HashSet<String>();
+ /**
+ * This is a helper map of information about the snapshot directories so we don't need to rescan
+ * them if they haven't changed since the last time we looked.
+ */
+ private final Map<String, SnapshotDirectoryInfo> snapshots =
+ new HashMap<String, SnapshotDirectoryInfo>();
+ private final Timer refreshTimer;
+
+ private long lastModifiedTime = Long.MIN_VALUE;
+
+ /**
+ * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the
+ * filesystem.
+ * <p>
+ * Immediately loads the file cache.
+ * @param conf to extract the configured {@link FileSystem} where the snapshots are stored and
+ * hbase root directory
+ * @param cacheRefreshPeriod frequency (ms) with which the cache should be refreshed
+ * @param refreshThreadName name of the cache refresh thread
+ * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files.
+ * @throws IOException if the {@link FileSystem} or root directory cannot be loaded
+ */
+ public SnapshotFileCache(Configuration conf, long cacheRefreshPeriod, String refreshThreadName,
+ SnapshotFileInspector inspectSnapshotFiles) throws IOException {
+ this(FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf), 0, cacheRefreshPeriod,
+ refreshThreadName, inspectSnapshotFiles);
+ }
+
+ /**
+ * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the
+ * filesystem
+ * @param fs {@link FileSystem} where the snapshots are stored
+ * @param rootDir hbase root directory
+ * @param cacheRefreshPeriod period (ms) with which the cache should be refreshed
+ * @param cacheRefreshDelay amount of time to wait for the cache to be refreshed
+ * @param refreshThreadName name of the cache refresh thread
+ * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files.
+ */
+ public SnapshotFileCache(FileSystem fs, Path rootDir, long cacheRefreshPeriod,
+ long cacheRefreshDelay, String refreshThreadName, SnapshotFileInspector inspectSnapshotFiles) {
+ this.fs = fs;
+ this.fileInspector = inspectSnapshotFiles;
+ this.snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
+ // periodically refresh the file cache to make sure we aren't superfluously saving files.
+ this.refreshTimer = new Timer(refreshThreadName, true);
+ this.refreshTimer.scheduleAtFixedRate(new RefreshCacheTask(), cacheRefreshDelay,
+ cacheRefreshPeriod);
+ }
+
+ /**
+ * Trigger a cache refresh, even if its before the next cache refresh. Does not affect pending
+ * cache refreshes.
+ * <p>
+ * Blocks until the cache is refreshed.
+ * <p>
+ * Exposed for TESTING.
+ */
+ public void triggerCacheRefreshForTesting() {
+ try {
+ SnapshotFileCache.this.refreshCache();
+ } catch (IOException e) {
+ LOG.warn("Failed to refresh snapshot hfile cache!", e);
+ }
+ LOG.debug("Current cache:" + cache);
+ }
+
+ /**
+ * Check to see if any of the passed file names is contained in any of the snapshots.
+ * First checks an in-memory cache of the files to keep. If its not in the cache, then the cache
+ * is refreshed and the cache checked again for that file.
+ * This ensures that we never return files that exist.
+ * <p>
+ * Note this may lead to periodic false positives for the file being referenced. Periodically, the
+ * cache is refreshed even if there are no requests to ensure that the false negatives get removed
+ * eventually. For instance, suppose you have a file in the snapshot and it gets loaded into the
+ * cache. Then at some point later that snapshot is deleted. If the cache has not been refreshed
+ * at that point, cache will still think the file system contains that file and return
+ * <tt>true</tt>, even if it is no longer present (false positive). However, if the file never was
+ * on the filesystem, we will never find it and always return <tt>false</tt>.
+ * @param files file to check, NOTE: Relies that files are loaded from hdfs before method
+ * is called (NOT LAZY)
+ * @return <tt>unReferencedFiles</tt> the collection of files that do not have snapshot references
+ * @throws IOException if there is an unexpected error reaching the filesystem.
+ */
+ // XXX this is inefficient to synchronize on the method, when what we really need to guard against
+ // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the
+ // cache, but that seems overkill at the moment and isn't necessarily a bottleneck.
+ public synchronized Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files,
+ final SnapshotManager snapshotManager)
+ throws IOException {
+ List<FileStatus> unReferencedFiles = Lists.newArrayList();
+ List<String> snapshotsInProgress = null;
+ boolean refreshed = false;
+ for (FileStatus file : files) {
+ String fileName = file.getPath().getName();
+ if (!refreshed && !cache.contains(fileName)) {
+ refreshCache();
+ refreshed = true;
+ }
+ if (cache.contains(fileName)) {
+ continue;
+ }
+ if (snapshotsInProgress == null) {
+ snapshotsInProgress = getSnapshotsInProgress(snapshotManager);
+ }
+ if (snapshotsInProgress.contains(fileName)) {
+ continue;
+ }
+ unReferencedFiles.add(file);
+ }
+ return unReferencedFiles;
+ }
+
+ private synchronized void refreshCache() throws IOException {
+ long lastTimestamp = Long.MAX_VALUE;
+ boolean hasChanges = false;
+
+ // get the status of the snapshots directory and check if it is has changes
+ try {
+ FileStatus dirStatus = fs.getFileStatus(snapshotDir);
+ lastTimestamp = dirStatus.getModificationTime();
+ hasChanges |= (lastTimestamp >= lastModifiedTime);
+ } catch (FileNotFoundException e) {
+ if (this.cache.size() > 0) {
+ LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist");
+ }
+ return;
+ }
+
+ // get the status of the snapshots temporary directory and check if it has changes
+ // The top-level directory timestamp is not updated, so we have to check the inner-level.
+ try {
+ Path snapshotTmpDir = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME);
+ FileStatus tempDirStatus = fs.getFileStatus(snapshotTmpDir);
+ lastTimestamp = Math.min(lastTimestamp, tempDirStatus.getModificationTime());
+ hasChanges |= (lastTimestamp >= lastModifiedTime);
+ if (!hasChanges) {
+ FileStatus[] tmpSnapshots = FSUtils.listStatus(fs, snapshotDir);
+ if (tmpSnapshots != null) {
+ for (FileStatus dirStatus: tmpSnapshots) {
+ lastTimestamp = Math.min(lastTimestamp, dirStatus.getModificationTime());
+ }
+ hasChanges |= (lastTimestamp >= lastModifiedTime);
+ }
+ }
+ } catch (FileNotFoundException e) {
+ // Nothing todo, if the tmp dir is empty
+ }
+
+ // if the snapshot directory wasn't modified since we last check, we are done
+ if (!hasChanges) {
+ return;
+ }
+
+ // directory was modified, so we need to reload our cache
+ // there could be a slight race here where we miss the cache, check the directory modification
+ // time, then someone updates the directory, causing us to not scan the directory again.
+ // However, snapshot directories are only created once, so this isn't an issue.
+
+ // 1. update the modified time
+ this.lastModifiedTime = lastTimestamp;
+
+ // 2.clear the cache
+ this.cache.clear();
+ Map<String, SnapshotDirectoryInfo> known = new HashMap<String, SnapshotDirectoryInfo>();
+
+ // 3. check each of the snapshot directories
+ FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir);
+ if (snapshots == null) {
+ // remove all the remembered snapshots because we don't have any left
+ if (LOG.isDebugEnabled() && this.snapshots.size() > 0) {
+ LOG.debug("No snapshots on-disk, cache empty");
+ }
+ this.snapshots.clear();
+ return;
+ }
+
+ // 3.1 iterate through the on-disk snapshots
+ for (FileStatus snapshot : snapshots) {
+ String name = snapshot.getPath().getName();
+ // its not the tmp dir,
+ if (!name.equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) {
+ SnapshotDirectoryInfo files = this.snapshots.remove(name);
+ // 3.1.1 if we don't know about the snapshot or its been modified, we need to update the
+ // files the latter could occur where I create a snapshot, then delete it, and then make a
+ // new snapshot with the same name. We will need to update the cache the information from
+ // that new snapshot, even though it has the same name as the files referenced have
+ // probably changed.
+ if (files == null || files.hasBeenModified(snapshot.getModificationTime())) {
+ // get all files for the snapshot and create a new info
+ Collection<String> storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath());
+ files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles);
+ }
+ // 3.2 add all the files to cache
+ this.cache.addAll(files.getFiles());
+ known.put(name, files);
+ }
+ }
+
+ // 4. set the snapshots we are tracking
+ this.snapshots.clear();
+ this.snapshots.putAll(known);
+ }
+
+ @VisibleForTesting List<String> getSnapshotsInProgress(
+ final SnapshotManager snapshotManager) throws IOException {
+ List<String> snapshotInProgress = Lists.newArrayList();
+ // only add those files to the cache, but not to the known snapshots
+ Path snapshotTmpDir = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME);
+ // only add those files to the cache, but not to the known snapshots
+ FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir);
+ if (running != null) {
+ for (FileStatus run : running) {
+ ReentrantLock lock = null;
+ if (snapshotManager != null) {
+ lock = snapshotManager.getLocks().acquireLock(run.getPath().getName());
+ }
+ try {
+ snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
+ } catch (CorruptedSnapshotException e) {
+ // See HBASE-16464
+ if (e.getCause() instanceof FileNotFoundException) {
+ // If the snapshot is corrupt, we will delete it
+ fs.delete(run.getPath(), true);
+ LOG.warn("delete the " + run.getPath() + " due to exception:", e.getCause());
+ } else {
+ throw e;
+ }
+ } finally {
+ if (lock != null) {
+ lock.unlock();
+ }
+ }
+ }
+ }
+ return snapshotInProgress;
+ }
+
+ /**
+ * Simple helper task that just periodically attempts to refresh the cache
+ */
+ public class RefreshCacheTask extends TimerTask {
+ @Override
+ public void run() {
+ try {
+ SnapshotFileCache.this.refreshCache();
+ } catch (IOException e) {
+ LOG.warn("Failed to refresh snapshot hfile cache!", e);
+ }
+ }
+ }
+
+ @Override
+ public void stop(String why) {
+ if (!this.stop) {
+ this.stop = true;
+ this.refreshTimer.cancel();
+ }
+
+ }
+
+ @Override
+ public boolean isStopped() {
+ return this.stop;
+ }
+
+ /**
+ * Information about a snapshot directory
+ */
+ private static class SnapshotDirectoryInfo {
+ long lastModified;
+ Collection<String> files;
+
+ public SnapshotDirectoryInfo(long mtime, Collection<String> files) {
+ this.lastModified = mtime;
+ this.files = files;
+ }
+
+ /**
+ * @return the hfiles in the snapshot when <tt>this</tt> was made.
+ */
+ public Collection<String> getFiles() {
+ return this.files;
+ }
+
+ /**
+ * Check if the snapshot directory has been modified
+ * @param mtime current modification time of the directory
+ * @return <tt>true</tt> if it the modification time of the directory is newer time when we
+ * created <tt>this</tt>
+ */
+ public boolean hasBeenModified(long mtime) {
+ return this.lastModified < mtime;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
new file mode 100644
index 0000000..89704f0
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.snapshot;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.BaseHFileCleanerDelegate;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
+import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * Implementation of a file cleaner that checks if a hfile is still used by snapshots of HBase
+ * tables.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+@InterfaceStability.Evolving
+public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate {
+ private static final Log LOG = LogFactory.getLog(SnapshotHFileCleaner.class);
+
+ /**
+ * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in
+ * snapshots (ms)
+ */
+ public static final String HFILE_CACHE_REFRESH_PERIOD_CONF_KEY =
+ "hbase.master.hfilecleaner.plugins.snapshot.period";
+
+ /** Refresh cache, by default, every 5 minutes */
+ private static final long DEFAULT_HFILE_CACHE_REFRESH_PERIOD = 300000;
+
+ /** File cache for HFiles in the completed and currently running snapshots */
+ private SnapshotFileCache cache;
+
+ private MasterServices master;
+
+ @Override
+ public synchronized Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
+ try {
+ if (master != null) return cache.getUnreferencedFiles(files, master.getSnapshotManager());
+ } catch (CorruptedSnapshotException cse) {
+ LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse);
+ } catch (IOException e) {
+ LOG.error("Exception while checking if files were valid, keeping them just in case.", e);
+ }
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void init(Map<String, Object> params) {
+ if (params != null && params.containsKey(HMaster.MASTER)) {
+ this.master = (MasterServices) params.get(HMaster.MASTER);
+ }
+ }
+
+ @Override
+ protected boolean isFileDeletable(FileStatus fStat) {
+ return false;
+ }
+
+ public void setConf(final Configuration conf) {
+ super.setConf(conf);
+ try {
+ long cacheRefreshPeriod = conf.getLong(HFILE_CACHE_REFRESH_PERIOD_CONF_KEY,
+ DEFAULT_HFILE_CACHE_REFRESH_PERIOD);
+ final FileSystem fs = FSUtils.getCurrentFileSystem(conf);
+ Path rootDir = FSUtils.getRootDir(conf);
+ cache = new SnapshotFileCache(fs, rootDir, cacheRefreshPeriod, cacheRefreshPeriod,
+ "snapshot-hfile-cleaner-cache-refresher", new SnapshotFileCache.SnapshotFileInspector() {
+ public Collection<String> filesUnderSnapshot(final Path snapshotDir)
+ throws IOException {
+ return SnapshotReferenceUtil.getHFileNames(conf, fs, snapshotDir);
+ }
+ });
+ } catch (IOException e) {
+ LOG.error("Failed to create cleaner util", e);
+ }
+ }
+
+
+ @Override
+ public void stop(String why) {
+ this.cache.stop(why);
+ }
+
+ @Override
+ public boolean isStopped() {
+ return this.cache.isStopped();
+ }
+
+ /**
+ * Exposed for Testing!
+ * @return the cache of all hfiles
+ */
+ public SnapshotFileCache getFileCacheForTesting() {
+ return this.cache;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
index 3caf67f..d3acb05 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
* {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
*
* <p><b>Back-references:</b>
- * To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of
+ * To help the {@link org.apache.hadoop.hbase.fs.legacy.cleaner.CleanerChore} to keep track of
* the links to a particular file, during the {@code FileLink} creation, a new file is placed
* inside a back-reference directory. There's one back-reference directory for each file that
* has links, and in the directory there's one file per link.