You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2016/11/03 21:19:36 UTC

[5/5] hbase git commit: HBASE-16957 Moved cleaners from master package to fs.legacy package and removed filesystem/ directory layout references from a few files in master package

HBASE-16957 Moved cleaners from master package to fs.legacy package and removed filesystem/ directory layout references from a few files in master package

    Incompatible config changes:

    * hbase.master.logcleaner.plugins default value that users should always add when customizing changes from
      org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner to org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveLogCleaner
    * hbase.master.hfilecleaner.plugins default value that users should always add when customizing changes from
      org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner to org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner

Signed-off-by: Sean Busbey <bu...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/53f4ec9e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/53f4ec9e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/53f4ec9e

Branch: refs/heads/hbase-14439
Commit: 53f4ec9e993d3cc11f89eb6c1295e4d0a6e1ce39
Parents: 983cad8
Author: Umesh Agashe <ua...@cloudera.com>
Authored: Fri Oct 21 14:28:17 2016 -0700
Committer: Sean Busbey <bu...@apache.org>
Committed: Thu Nov 3 16:13:44 2016 -0500

----------------------------------------------------------------------
 .../src/main/resources/hbase-default.xml        |  30 +-
 .../example/LongTermArchivingHFileCleaner.java  |   4 +-
 .../apache/hadoop/hbase/fs/MasterStorage.java   |  90 ++++-
 .../hadoop/hbase/fs/legacy/LegacyLayout.java    |   4 +
 .../hbase/fs/legacy/LegacyMasterStorage.java    | 113 +++++-
 .../legacy/cleaner/BaseFileCleanerDelegate.java |  56 +++
 .../cleaner/BaseHFileCleanerDelegate.java       |  54 +++
 .../legacy/cleaner/BaseLogCleanerDelegate.java  |  57 +++
 .../hbase/fs/legacy/cleaner/CleanerChore.java   | 294 ++++++++++++++
 .../fs/legacy/cleaner/FileCleanerDelegate.java  |  47 +++
 .../hbase/fs/legacy/cleaner/HFileCleaner.java   |  72 ++++
 .../fs/legacy/cleaner/HFileLinkCleaner.java     | 113 ++++++
 .../hbase/fs/legacy/cleaner/LogCleaner.java     |  56 +++
 .../legacy/cleaner/TimeToLiveHFileCleaner.java  |  64 +++
 .../fs/legacy/cleaner/TimeToLiveLogCleaner.java |  73 ++++
 .../fs/legacy/snapshot/SnapshotFileCache.java   | 390 ++++++++++++++++++
 .../legacy/snapshot/SnapshotHFileCleaner.java   | 126 ++++++
 .../org/apache/hadoop/hbase/io/FileLink.java    |   2 +-
 .../hadoop/hbase/master/AssignmentManager.java  |  44 +-
 .../org/apache/hadoop/hbase/master/HMaster.java |  39 +-
 .../hbase/master/MasterStatusServlet.java       |   3 +-
 .../master/cleaner/BaseFileCleanerDelegate.java |  56 ---
 .../cleaner/BaseHFileCleanerDelegate.java       |  54 ---
 .../master/cleaner/BaseLogCleanerDelegate.java  |  57 ---
 .../hbase/master/cleaner/CleanerChore.java      | 294 --------------
 .../master/cleaner/FileCleanerDelegate.java     |  47 ---
 .../hbase/master/cleaner/HFileCleaner.java      |  72 ----
 .../hbase/master/cleaner/HFileLinkCleaner.java  | 113 ------
 .../hadoop/hbase/master/cleaner/LogCleaner.java |  56 ---
 .../master/cleaner/TimeToLiveHFileCleaner.java  |  64 ---
 .../master/cleaner/TimeToLiveLogCleaner.java    |  73 ----
 .../master/snapshot/SnapshotFileCache.java      | 389 ------------------
 .../master/snapshot/SnapshotHFileCleaner.java   | 126 ------
 .../hbase/master/snapshot/SnapshotManager.java  |  28 +-
 .../hbase/protobuf/ServerProtobufUtil.java      | 122 +-----
 .../regionserver/StorefileRefresherChore.java   |   2 +-
 .../master/ReplicationHFileCleaner.java         |   5 +-
 .../master/ReplicationLogCleaner.java           |   6 +-
 .../replication/regionserver/Replication.java   |   2 +-
 .../org/apache/hadoop/hbase/util/FSUtils.java   |  57 ---
 .../resources/hbase-webapps/master/table.jsp    |   4 +-
 .../hadoop/hbase/HBaseTestingUtility.java       |  14 +
 .../hadoop/hbase/backup/TestHFileArchiving.java |  17 +-
 .../TestZooKeeperTableArchiveClient.java        |   4 +-
 .../client/TestCloneSnapshotFromClient.java     |   6 +-
 .../client/TestRestoreSnapshotFromClient.java   |   6 +-
 .../fs/legacy/cleaner/TestCleanerChore.java     | 319 +++++++++++++++
 .../fs/legacy/cleaner/TestHFileCleaner.java     | 263 ++++++++++++
 .../fs/legacy/cleaner/TestHFileLinkCleaner.java | 201 +++++++++
 .../fs/legacy/cleaner/TestLogsCleaner.java      | 308 ++++++++++++++
 .../legacy/cleaner/TestSnapshotFromMaster.java  | 394 ++++++++++++++++++
 .../legacy/snapshot/TestSnapshotFileCache.java  | 288 +++++++++++++
 .../snapshot/TestSnapshotHFileCleaner.java      | 190 +++++++++
 .../hbase/master/cleaner/TestCleanerChore.java  | 319 ---------------
 .../hbase/master/cleaner/TestHFileCleaner.java  | 263 ------------
 .../master/cleaner/TestHFileLinkCleaner.java    | 201 ---------
 .../hbase/master/cleaner/TestLogsCleaner.java   | 309 --------------
 .../cleaner/TestReplicationHFileCleaner.java    | 341 ----------------
 .../master/cleaner/TestSnapshotFromMaster.java  | 404 -------------------
 .../master/snapshot/TestSnapshotFileCache.java  | 282 -------------
 .../snapshot/TestSnapshotHFileCleaner.java      | 190 ---------
 .../master/snapshot/TestSnapshotManager.java    |   5 +-
 .../master/TestReplicationHFileCleaner.java     | 341 ++++++++++++++++
 src/main/asciidoc/_chapters/hbase-default.adoc  |   4 +-
 64 files changed, 3997 insertions(+), 4030 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-common/src/main/resources/hbase-default.xml
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 4f769cb..8bb46f5 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -122,13 +122,13 @@ possible configurations would overwhelm and obscure the important.
   </property>
   <property>
     <name>hbase.master.logcleaner.plugins</name>
-    <value>org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner</value>
-    <description>A comma-separated list of BaseLogCleanerDelegate invoked by
-    the LogsCleaner service. These WAL cleaners are called in order,
-    so put the cleaner that prunes the most files in front. To
-    implement your own BaseLogCleanerDelegate, just put it in HBase's classpath
-    and add the fully qualified class name here. Always add the above
-    default log cleaners in the list.</description>
+    <value>org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveLogCleaner</value>
+    <description>This property is used when 'hbase.storage.type' is set to 'legacy'.
+    A comma-separated list of BaseLogCleanerDelegate invoked by the LogsCleaner
+    service. These WAL cleaners are called in order, so put the cleaner that prunes
+    the most files in front. To implement your own BaseLogCleanerDelegate, just put
+    it in HBase's classpath and add the fully qualified class name here. Always add
+    the above default log cleaners in the list.</description>
   </property>
   <property>
     <name>hbase.master.logcleaner.ttl</name>
@@ -138,14 +138,14 @@ possible configurations would overwhelm and obscure the important.
   </property>
   <property>
     <name>hbase.master.hfilecleaner.plugins</name>
-    <value>org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner</value>
-    <description>A comma-separated list of BaseHFileCleanerDelegate invoked by
-    the HFileCleaner service. These HFiles cleaners are called in order,
-    so put the cleaner that prunes the most files in front. To
-    implement your own BaseHFileCleanerDelegate, just put it in HBase's classpath
-    and add the fully qualified class name here. Always add the above
-    default log cleaners in the list as they will be overwritten in
-    hbase-site.xml.</description>
+    <value>org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner</value>
+    <description>This property is used when 'hbase.storage.type' is set to 'legacy'.
+    A comma-separated list of BaseHFileCleanerDelegate invoked by the HFileCleaner
+    service. These HFiles cleaners are called in order, so put the cleaner that
+    prunes the most files in front. To implement your own BaseHFileCleanerDelegate,
+    just put it in HBase's classpath and add the fully qualified class name here.
+    Always add the above default log cleaners in the list as they will be
+    overwritten in hbase-site.xml.</description>
   </property>
   <property>
     <name>hbase.master.infoserver.redirect</name>

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
index 09a6659..ad2e8b4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/example/LongTermArchivingHFileCleaner.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
-import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.BaseHFileCleanerDelegate;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.zookeeper.KeeperException;
 
@@ -36,7 +36,7 @@ import org.apache.zookeeper.KeeperException;
  * currently being archived.
  * <p>
  * This only works properly if the 
- * {@link org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner}
+ * {@link org.apache.hadoop.hbase.fs.legacy.cleaner.TimeToLiveHFileCleaner}
  *  is also enabled (it always should be), since it may take a little time
  *  for the ZK notification to propagate, in which case we may accidentally
  *  delete some files.

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
index 0ccae4a..2f3b4a4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/MasterStorage.java
@@ -22,18 +22,22 @@ package org.apache.hadoop.hbase.fs;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.ClusterId;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.fs.legacy.LegacyMasterStorage;
 import org.apache.hadoop.hbase.fs.RegionStorage.StoreFileVisitor;
 import org.apache.hadoop.hbase.fs.legacy.LegacyPathIdentifier;
@@ -62,6 +66,39 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
   public FileSystem getFileSystem() { return fs; }  // TODO: definitely remove
   public IDENTIFIER getRootContainer() { return rootContainer; }
 
+  /**
+   * Get Chores that are required to be run from time to time for the underlying MasterStorage
+   * implementation. A few setup methods e.g. {@link #enableSnapshots()} may have their own chores.
+   * The returned list of chores or their configuration may vary depending on when in sequence
+   * this method is called with respect to other methods. Generally, a call to this method for
+   * getting and scheduling chores, needs to be after storage is setup properly by calling those
+   * methods first.
+   *
+   * Please refer to the documentation of specific method implementation for more details.
+   *
+   * @param stopper the stopper
+   * @return  storage chores.
+   */
+  public Iterable<ScheduledChore> getChores(Stoppable stopper, Map<String, Object> params) {
+    return new ArrayList<>();
+  }
+
+  /**
+   * This method should be called to prepare storage implementation/s for snapshots. The default
+   * implementation does nothing. MasterStorage subclasses need to override this method to
+   * provide specific preparatory steps.
+   */
+  public void enableSnapshots() {
+    return;
+  }
+
+  /**
+   * Returns true if MasterStorage is prepared for snapshots
+   */
+  public boolean isSnapshotsEnabled() {
+    return true;
+  }
+
   // ==========================================================================
   //  PUBLIC Interfaces - Visitors
   // ==========================================================================
@@ -178,6 +215,45 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
    */
   public abstract void archiveTable(StorageContext ctx, TableName tableName) throws IOException;
 
+  /**
+   * Runs through all tables and checks how many stores for each table
+   * have more than one file in them. Checks -ROOT- and hbase:meta too. The total
+   * percentage across all tables is stored under the special key "-TOTAL-".
+   *
+   * @return A map for each table and its percentage.
+   *
+   * @throws IOException When scanning the directory fails.
+   */
+  public Map<String, Integer> getTableFragmentation() throws IOException {
+    final Map<String, Integer> frags = new HashMap<>();
+    int cfCountTotal = 0;
+    int cfFragTotal = 0;
+
+    for (TableName table: getTables()) {
+      int cfCount = 0;
+      int cfFrag = 0;
+      for (HRegionInfo hri: getRegions(table)) {
+        RegionStorage<IDENTIFIER> rs = getRegionStorage(hri);
+        final Collection<String> families = rs.getFamilies();
+        for (String family: families) {
+          cfCount++;
+          cfCountTotal++;
+          if (rs.getStoreFiles(family).size() > 1) {
+            cfFrag++;
+            cfFragTotal++;
+          }
+        }
+      }
+      // compute percentage per table and store in result list
+      frags.put(table.getNameAsString(),
+          cfCount == 0? 0: Math.round((float) cfFrag / cfCount * 100));
+    }
+    // set overall percentage for all tables
+    frags.put("-TOTAL-",
+        cfCountTotal == 0? 0: Math.round((float) cfFragTotal / cfCountTotal * 100));
+    return frags;
+  }
+
   // ==========================================================================
   //  PUBLIC Methods - Table Region related
   // ==========================================================================
@@ -232,6 +308,16 @@ public abstract class MasterStorage<IDENTIFIER extends StorageIdentifier> {
   public abstract void archiveRegion(HRegionInfo regionInfo) throws IOException;
 
   // ==========================================================================
+  // PUBLIC Methods - WAL
+  // ==========================================================================
+
+  /**
+   * Returns true if given region server has non-empty WAL files
+   * @param serverName
+   */
+  public abstract boolean hasWALs(String serverName) throws IOException;
+
+  // ==========================================================================
   //  PUBLIC Methods - visitors
   // ==========================================================================
   public void visitStoreFiles(StoreFileVisitor visitor)

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
index ce29bb2..2906f91 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyLayout.java
@@ -124,4 +124,8 @@ public final class LegacyLayout {
   public static Path getBulkDir(Path rootDir) {
     return new Path(rootDir, HConstants.BULKLOAD_STAGING_DIR_NAME);
   }
+
+  public static Path getOldLogDir(final Path rootDir) {
+    return new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
index b6443de..aa4de2c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/LegacyMasterStorage.java
@@ -24,6 +24,9 @@ import java.io.FileNotFoundException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -34,8 +37,19 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.ClusterId;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.HFileCleaner;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.HFileLinkCleaner;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.LogCleaner;
+import org.apache.hadoop.hbase.fs.legacy.snapshot.SnapshotHFileCleaner;
 import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -80,6 +94,8 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
 
   private final boolean isSecurityEnabled;
 
+  public static final String SPLITTING_EXT = "-splitting";
+
   public LegacyMasterStorage(Configuration conf, FileSystem fs, LegacyPathIdentifier rootDir) {
     super(conf, fs, rootDir);
 
@@ -97,6 +113,54 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
     this.isSecurityEnabled = "kerberos".equalsIgnoreCase(conf.get("hbase.security.authentication"));
   }
 
+  @Override
+  public Iterable<ScheduledChore> getChores(Stoppable stopper, Map<String, Object> params) {
+    ArrayList<ScheduledChore> chores = (ArrayList<ScheduledChore>) super.getChores(stopper, params);
+
+    int cleanerInterval = getConfiguration().getInt("hbase.master.cleaner.interval", 60 * 1000);
+    // add log cleaner chore
+    chores.add(new LogCleaner(cleanerInterval, stopper, getConfiguration(), getFileSystem(),
+        LegacyLayout.getOldLogDir(getRootContainer().path)));
+    // add hfile archive cleaner chore
+    chores.add(new HFileCleaner(cleanerInterval, stopper, getConfiguration(), getFileSystem(),
+        LegacyLayout.getArchiveDir(getRootContainer().path), params));
+
+    return chores;
+  }
+
+  /**
+   * This method modifies chores configuration for snapshots. Please call this method before
+   * instantiating and scheduling list of chores with {@link #getChores(Stoppable, Map)}.
+   */
+  @Override
+  public void enableSnapshots() {
+    super.enableSnapshots();
+    if (!isSnapshotsEnabled()) {
+      // Extract cleaners from conf
+      Set<String> hfileCleaners = new HashSet<>();
+      String[] cleaners = getConfiguration().getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
+      if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
+
+      // add snapshot related cleaners
+      hfileCleaners.add(SnapshotHFileCleaner.class.getName());
+      hfileCleaners.add(HFileLinkCleaner.class.getName());
+
+      // Set cleaners conf
+      getConfiguration().setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
+          hfileCleaners.toArray(new String[hfileCleaners.size()]));
+    }
+  }
+
+  @Override
+  public boolean isSnapshotsEnabled() {
+    // Extract cleaners from conf
+    Set<String> hfileCleaners = new HashSet<>();
+    String[] cleaners = getConfiguration().getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
+    if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
+    return hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
+        hfileCleaners.contains(HFileLinkCleaner.class.getName());
+  }
+
   // ==========================================================================
   //  PUBLIC Methods - Namespace related
   // ==========================================================================
@@ -268,6 +332,53 @@ public class LegacyMasterStorage extends MasterStorage<LegacyPathIdentifier> {
   }
 
   // ==========================================================================
+  // PUBLIC - WAL
+  // ==========================================================================
+  @Override
+  public boolean hasWALs(String serverName) throws IOException {
+    Path logDir = new Path(getRootContainer().path, new StringBuilder(
+        HConstants.HREGION_LOGDIR_NAME).append("/").append(serverName).toString());
+    Path splitDir = logDir.suffix(SPLITTING_EXT);
+
+    return checkWALs(logDir) || checkWALs(splitDir);
+  }
+
+
+  // ==========================================================================
+  // PRIVATE - WAL
+  // ==========================================================================
+
+  private boolean checkWALs(Path dir) throws IOException {
+    FileSystem fs = getFileSystem();
+
+    if (!fs.exists(dir)) {
+      LOG.debug(dir + " not found!");
+      return false;
+    } else if (!fs.getFileStatus(dir).isDirectory()) {
+      LOG.warn(dir + " is not a directory");
+      return false;
+    }
+
+    FileStatus[] files = FSUtils.listStatus(fs, dir);
+    if (files == null || files.length == 0) {
+      LOG.debug(dir + " has no files");
+      return false;
+    }
+
+    for (FileStatus dentry: files) {
+      if (dentry.isFile() && dentry.getLen() > 0) {
+        LOG.debug(dir + " has a non-empty file: " + dentry.getPath());
+        return true;
+      } else if (dentry.isDirectory() && checkWALs(dentry.getPath())) {
+        LOG.debug(dentry + " is a directory and has a non-empty file!");
+        return true;
+      }
+    }
+    LOG.debug("Found zero non-empty wal files for: " + dir);
+    return false;
+  }
+
+  // ==========================================================================
   //  PROTECTED Methods - Bootstrap
   // ==========================================================================
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
new file mode 100644
index 0000000..6ab33a7
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseFileCleanerDelegate.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.BaseConfigurable;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+
+import java.util.Map;
+
+/**
+ * Base class for file cleaners which allows subclasses to implement a simple
+ * isFileDeletable method (which used to be the FileCleanerDelegate contract).
+ */
+public abstract class BaseFileCleanerDelegate extends BaseConfigurable
+implements FileCleanerDelegate {
+
+  @Override
+  public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
+    return Iterables.filter(files, new Predicate<FileStatus>() {
+      @Override
+      public boolean apply(FileStatus file) {
+        return isFileDeletable(file);
+      }});
+  }
+
+  @Override
+  public void init(Map<String, Object> params) {
+    // subclass could override it if needed.
+  }
+
+  /**
+   * Should the master delete the file or keep it?
+   * @param fStat file status of the file to check
+   * @return <tt>true</tt> if the file is deletable, <tt>false</tt> if not
+   */
+  protected abstract boolean isFileDeletable(FileStatus fStat);
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
new file mode 100644
index 0000000..a5018bd
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseHFileCleanerDelegate.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+
+/**
+ * Base class for the hfile cleaning function inside the master. By default, only the
+ * {@link TimeToLiveHFileCleaner} is called.
+ * <p>
+ * If other effects are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.hfilecleaner.plugins", which is a comma-separated list of fully
+ * qualified class names. The <code>HFileCleaner</code> will build the cleaner chain in
+ * order the order specified by the configuration.
+ * </p>
+ * <p>
+ * For subclasses, setConf will be called exactly <i>once</i> before using the cleaner.
+ * </p>
+ * <p>
+ * Since {@link BaseHFileCleanerDelegate HFileCleanerDelegates} are created in
+ * HFileCleaner by reflection, classes that implements this interface <b>must</b>
+ * provide a default constructor.
+ * </p>
+ */
+@InterfaceAudience.Private
+public abstract class BaseHFileCleanerDelegate extends BaseFileCleanerDelegate {
+
+  private boolean stopped = false;
+
+  @Override
+  public void stop(String why) {
+    this.stopped = true;
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
new file mode 100644
index 0000000..4d4e31f
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/BaseLogCleanerDelegate.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Base class for the log cleaning function inside the master. By default, two
+ * cleaners: <code>TimeToLiveLogCleaner</code> and
+ * <code>ReplicationLogCleaner</code> are called in order. So if other effects
+ * are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.logcleaner.plugins", which is a comma-separated
+ * list of fully qualified class names. LogsCleaner will add it to the chain.
+ * <p>
+ * HBase ships with LogsCleaner as the default implementation.
+ * <p>
+ * This interface extends Configurable, so setConf needs to be called once
+ * before using the cleaner. Since LogCleanerDelegates are created in
+ * LogsCleaner by reflection. Classes that implements this interface should
+ * provide a default constructor.
+ */
+@InterfaceAudience.Private
+public abstract class BaseLogCleanerDelegate extends BaseFileCleanerDelegate {
+
+  @Override
+  public boolean isFileDeletable(FileStatus fStat) {
+    return isLogDeletable(fStat);
+  }
+
+  /**
+   * Should the master delete the log or keep it?
+   * <p>
+   * Implementing classes should override {@link #isFileDeletable(FileStatus)} instead.
+   * @param fStat file status of the file
+   * @return true if the log is deletable, false (default) if not
+   */
+  @Deprecated
+  public boolean isLogDeletable(FileStatus fStat) {
+    return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
new file mode 100644
index 0000000..ed23d96
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/CleanerChore.java
@@ -0,0 +1,294 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.ipc.RemoteException;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Abstract Cleaner that uses a chain of delegates to clean a directory of files
+ * @param <T> Cleaner delegate class that is dynamically loaded from configuration
+ */
+public abstract class CleanerChore<T extends FileCleanerDelegate> extends ScheduledChore {
+
+  private static final Log LOG = LogFactory.getLog(CleanerChore.class.getName());
+
+  private final FileSystem fs;
+  private final Path oldFileDir;
+  private final Configuration conf;
+  protected List<T> cleanersChain;
+  protected Map<String, Object> params;
+
+  public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf,
+                      FileSystem fs, Path oldFileDir, String confKey) {
+    this(name, sleepPeriod, s, conf, fs, oldFileDir, confKey, null);
+  }
+
+  /**
+   * @param name name of the chore being run
+   * @param sleepPeriod the period of time to sleep between each run
+   * @param s the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param oldFileDir the path to the archived files
+   * @param confKey configuration key for the classes to instantiate
+   * @param params members could be used in cleaner
+   */
+  public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf,
+      FileSystem fs, Path oldFileDir, String confKey, Map<String, Object> params) {
+    super(name, s, sleepPeriod);
+    this.fs = fs;
+    this.oldFileDir = oldFileDir;
+    this.conf = conf;
+    this.params = params;
+    initCleanerChain(confKey);
+  }
+
+
+  /**
+   * Validate the file to see if it even belongs in the directory. If it is valid, then the file
+   * will go through the cleaner delegates, but otherwise the file is just deleted.
+   * @param file full {@link Path} of the file to be checked
+   * @return <tt>true</tt> if the file is valid, <tt>false</tt> otherwise
+   */
+  protected abstract boolean validate(Path file);
+
+  /**
+   * Instantiate and initialize all the file cleaners set in the configuration
+   * @param confKey key to get the file cleaner classes from the configuration
+   */
+  private void initCleanerChain(String confKey) {
+    this.cleanersChain = new LinkedList<T>();
+    String[] logCleaners = conf.getStrings(confKey);
+    if (logCleaners != null) {
+      for (String className : logCleaners) {
+        T logCleaner = newFileCleaner(className, conf);
+        if (logCleaner != null) {
+          LOG.debug("initialize cleaner=" + className);
+          this.cleanersChain.add(logCleaner);
+        }
+      }
+    }
+  }
+
+  /**
+   * A utility method to create new instances of LogCleanerDelegate based on the class name of the
+   * LogCleanerDelegate.
+   * @param className fully qualified class name of the LogCleanerDelegate
+   * @param conf
+   * @return the new instance
+   */
+  private T newFileCleaner(String className, Configuration conf) {
+    try {
+      Class<? extends FileCleanerDelegate> c = Class.forName(className).asSubclass(
+        FileCleanerDelegate.class);
+      @SuppressWarnings("unchecked")
+      T cleaner = (T) c.newInstance();
+      cleaner.setConf(conf);
+      cleaner.init(this.params);
+      return cleaner;
+    } catch (Exception e) {
+      LOG.warn("Can NOT create CleanerDelegate: " + className, e);
+      // skipping if can't instantiate
+      return null;
+    }
+  }
+
+  @Override
+  protected void chore() {
+    try {
+      FileStatus[] files = FSUtils.listStatus(this.fs, this.oldFileDir);
+      checkAndDeleteEntries(files);
+    } catch (IOException e) {
+      e = e instanceof RemoteException ?
+              ((RemoteException)e).unwrapRemoteException() : e;
+      LOG.warn("Error while cleaning the logs", e);
+    }
+  }
+
+  /**
+   * Loop over the given directory entries, and check whether they can be deleted.
+   * If an entry is itself a directory it will be recursively checked and deleted itself iff
+   * all subentries are deleted (and no new subentries are added in the mean time)
+   *
+   * @param entries directory entries to check
+   * @return true if all entries were successfully deleted
+   */
+  private boolean checkAndDeleteEntries(FileStatus[] entries) {
+    if (entries == null) {
+      return true;
+    }
+    boolean allEntriesDeleted = true;
+    List<FileStatus> files = Lists.newArrayListWithCapacity(entries.length);
+    for (FileStatus child : entries) {
+      Path path = child.getPath();
+      if (child.isDirectory()) {
+        // for each subdirectory delete it and all entries if possible
+        if (!checkAndDeleteDirectory(path)) {
+          allEntriesDeleted = false;
+        }
+      } else {
+        // collect all files to attempt to delete in one batch
+        files.add(child);
+      }
+    }
+    if (!checkAndDeleteFiles(files)) {
+      allEntriesDeleted = false;
+    }
+    return allEntriesDeleted;
+  }
+
+  /**
+   * Attempt to delete a directory and all files under that directory. Each child file is passed
+   * through the delegates to see if it can be deleted. If the directory has no children when the
+   * cleaners have finished it is deleted.
+   * <p>
+   * If new children files are added between checks of the directory, the directory will <b>not</b>
+   * be deleted.
+   * @param dir directory to check
+   * @return <tt>true</tt> if the directory was deleted, <tt>false</tt> otherwise.
+   */
+  @VisibleForTesting boolean checkAndDeleteDirectory(Path dir) {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Checking directory: " + dir);
+    }
+
+    try {
+      FileStatus[] children = FSUtils.listStatus(fs, dir);
+      boolean allChildrenDeleted = checkAndDeleteEntries(children);
+
+      // if the directory still has children, we can't delete it, so we are done
+      if (!allChildrenDeleted) return false;
+    } catch (IOException e) {
+      e = e instanceof RemoteException ?
+              ((RemoteException)e).unwrapRemoteException() : e;
+      LOG.warn("Error while listing directory: " + dir, e);
+      // couldn't list directory, so don't try to delete, and don't return success
+      return false;
+    }
+
+    // otherwise, all the children (that we know about) have been deleted, so we should try to
+    // delete this directory. However, don't do so recursively so we don't delete files that have
+    // been added since we last checked.
+    try {
+      return fs.delete(dir, false);
+    } catch (IOException e) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Couldn't delete directory: " + dir, e);
+      }
+      // couldn't delete w/o exception, so we can't return success.
+      return false;
+    }
+  }
+
+  /**
+   * Run the given files through each of the cleaners to see if it should be deleted, deleting it if
+   * necessary.
+   * @param files List of FileStatus for the files to check (and possibly delete)
+   * @return true iff successfully deleted all files
+   */
+  private boolean checkAndDeleteFiles(List<FileStatus> files) {
+    // first check to see if the path is valid
+    List<FileStatus> validFiles = Lists.newArrayListWithCapacity(files.size());
+    List<FileStatus> invalidFiles = Lists.newArrayList();
+    for (FileStatus file : files) {
+      if (validate(file.getPath())) {
+        validFiles.add(file);
+      } else {
+        LOG.warn("Found a wrongly formatted file: " + file.getPath() + " - will delete it.");
+        invalidFiles.add(file);
+      }
+    }
+
+    Iterable<FileStatus> deletableValidFiles = validFiles;
+    // check each of the cleaners for the valid files
+    for (T cleaner : cleanersChain) {
+      if (cleaner.isStopped() || this.getStopper().isStopped()) {
+        LOG.warn("A file cleaner" + this.getName() + " is stopped, won't delete any more files in:"
+            + this.oldFileDir);
+        return false;
+      }
+
+      Iterable<FileStatus> filteredFiles = cleaner.getDeletableFiles(deletableValidFiles);
+
+      // trace which cleaner is holding on to each file
+      if (LOG.isTraceEnabled()) {
+        ImmutableSet<FileStatus> filteredFileSet = ImmutableSet.copyOf(filteredFiles);
+        for (FileStatus file : deletableValidFiles) {
+          if (!filteredFileSet.contains(file)) {
+            LOG.trace(file.getPath() + " is not deletable according to:" + cleaner);
+          }
+        }
+      }
+
+      deletableValidFiles = filteredFiles;
+    }
+
+    Iterable<FileStatus> filesToDelete = Iterables.concat(invalidFiles, deletableValidFiles);
+    int deletedFileCount = 0;
+    for (FileStatus file : filesToDelete) {
+      Path filePath = file.getPath();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Removing: " + filePath + " from archive");
+      }
+      try {
+        boolean success = this.fs.delete(filePath, false);
+        if (success) {
+          deletedFileCount++;
+        } else {
+          LOG.warn("Attempted to delete:" + filePath
+              + ", but couldn't. Run cleaner chain and attempt to delete on next pass.");
+        }
+      } catch (IOException e) {
+        e = e instanceof RemoteException ?
+                  ((RemoteException)e).unwrapRemoteException() : e;
+        LOG.warn("Error while deleting: " + filePath, e);
+      }
+    }
+
+    return deletedFileCount == files.size();
+  }
+
+  @Override
+  public void cleanup() {
+    for (T lc : this.cleanersChain) {
+      try {
+        lc.stop("Exiting");
+      } catch (Throwable t) {
+        LOG.warn("Stopping", t);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
new file mode 100644
index 0000000..0291640
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/FileCleanerDelegate.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.Stoppable;
+
+import java.util.Map;
+
+/**
+ * General interface for cleaning files from a folder (generally an archive or
+ * backup folder). These are chained via the {@link CleanerChore} to determine
+ * if a given file should be deleted.
+ */
+@InterfaceAudience.Private
+public interface FileCleanerDelegate extends Configurable, Stoppable {
+
+  /**
+   * Determines which of the given files are safe to delete
+   * @param files files to check for deletion
+   * @return files that are ok to delete according to this cleaner
+   */
+  Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files);
+
+
+  /**
+   * this method is used to pass some instance into subclass
+   * */
+  void init(Map<String, Object> params);
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
new file mode 100644
index 0000000..7137432
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileCleaner.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+/**
+ * This Chore, every time it runs, will clear the HFiles in the hfile archive
+ * folder that are deletable for each HFile cleaner in the chain.
+ */
+@InterfaceAudience.Private
+public class HFileCleaner extends CleanerChore<BaseHFileCleanerDelegate> {
+
+  public static final String MASTER_HFILE_CLEANER_PLUGINS = "hbase.master.hfilecleaner.plugins";
+
+  public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs,
+      Path directory) {
+    this(period, stopper, conf, fs, directory, null);
+  }
+
+  /**
+   * @param period the period of time to sleep between each run
+   * @param stopper the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param directory directory to be cleaned
+   * @param params params could be used in subclass of BaseHFileCleanerDelegate
+   */
+  public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs,
+                      Path directory, Map<String, Object> params) {
+    super("HFileCleaner", period, stopper, conf, fs,
+      directory, MASTER_HFILE_CLEANER_PLUGINS, params);
+  }
+
+  @Override
+  protected boolean validate(Path file) {
+    if (HFileLink.isBackReferencesDir(file) || HFileLink.isBackReferencesDir(file.getParent())) {
+      return true;
+    }
+    return StoreFileInfo.validateStoreFileName(file.getName());
+  }
+
+  /**
+   * Exposed for TESTING!
+   */
+  public List<BaseHFileCleanerDelegate> getDelegatesForTesting() {
+    return this.cleanersChain;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
new file mode 100644
index 0000000..7c1f4d2
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/HFileLinkCleaner.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.HFileLink;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * HFileLink cleaner that determines if a hfile should be deleted.
+ * HFiles can be deleted only if there're no links to them.
+ *
+ * When a HFileLink is created a back reference file is created in:
+ *      /hbase/archive/table/region/cf/.links-hfile/ref-region.ref-table
+ * To check if the hfile can be deleted the back references folder must be empty.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class HFileLinkCleaner extends BaseHFileCleanerDelegate {
+  private static final Log LOG = LogFactory.getLog(HFileLinkCleaner.class);
+
+  private FileSystem fs = null;
+
+  @Override
+  public synchronized boolean isFileDeletable(FileStatus fStat) {
+    if (this.fs == null) return false;
+    Path filePath = fStat.getPath();
+    // HFile Link is always deletable
+    if (HFileLink.isHFileLink(filePath)) return true;
+
+    // If the file is inside a link references directory, means that it is a back ref link.
+    // The back ref can be deleted only if the referenced file doesn't exists.
+    Path parentDir = filePath.getParent();
+    if (HFileLink.isBackReferencesDir(parentDir)) {
+      Path hfilePath = null;
+      try {
+        // Also check if the HFile is in the HBASE_TEMP_DIRECTORY; this is where the referenced
+        // file gets created when cloning a snapshot.
+        hfilePath = HFileLink.getHFileFromBackReference(
+            new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY), filePath);
+        if (fs.exists(hfilePath)) {
+          return false;
+        }
+        // check whether the HFileLink still exists in mob dir.
+        hfilePath = HFileLink.getHFileFromBackReference(MobUtils.getMobHome(getConf()), filePath);
+        if (fs.exists(hfilePath)) {
+          return false;
+        }
+        hfilePath = HFileLink.getHFileFromBackReference(FSUtils.getRootDir(getConf()), filePath);
+        return !fs.exists(hfilePath);
+      } catch (IOException e) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Couldn't verify if the referenced file still exists, keep it just in case: "
+              + hfilePath);
+        }
+        return false;
+      }
+    }
+
+    // HFile is deletable only if has no links
+    Path backRefDir = null;
+    try {
+      backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName());
+      return FSUtils.listStatus(fs, backRefDir) == null;
+    } catch (IOException e) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Couldn't get the references, not deleting file, just in case. filePath="
+            + filePath + ", backRefDir=" + backRefDir);
+      }
+      return false;
+    }
+  }
+
+  @Override
+  public synchronized void setConf(Configuration conf) {
+    super.setConf(conf);
+
+    // setup filesystem
+    try {
+      this.fs = FileSystem.get(this.getConf());
+    } catch (IOException e) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Couldn't instantiate the file system, not deleting file, just in case. "
+            + FileSystem.FS_DEFAULT_NAME_KEY + "="
+            + getConf().get(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS));
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
new file mode 100644
index 0000000..430c482
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/LogCleaner.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+
+/**
+ * This Chore, every time it runs, will attempt to delete the WALs in the old logs folder. The WAL
+ * is only deleted if none of the cleaner delegates says otherwise.
+ * @see BaseLogCleanerDelegate
+ */
+@InterfaceAudience.Private
+public class LogCleaner extends CleanerChore<BaseLogCleanerDelegate> {
+  private static final Log LOG = LogFactory.getLog(LogCleaner.class.getName());
+
+  /**
+   * @param p the period of time to sleep between each run
+   * @param s the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param oldLogDir the path to the archived logs
+   */
+  public LogCleaner(final int p, final Stoppable s, Configuration conf, FileSystem fs,
+      Path oldLogDir) {
+    super("LogsCleaner", p, s, conf, fs, oldLogDir, HBASE_MASTER_LOGCLEANER_PLUGINS);
+  }
+
+  @Override
+  protected boolean validate(Path file) {
+    return AbstractFSWALProvider.validateWALFilename(file.getName());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
new file mode 100644
index 0000000..177df38
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveHFileCleaner.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+/**
+ * HFile cleaner that uses the timestamp of the hfile to determine if it should be deleted. By
+ * default they are allowed to live for {@value #DEFAULT_TTL}
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class TimeToLiveHFileCleaner extends BaseHFileCleanerDelegate {
+
+  private static final Log LOG = LogFactory.getLog(TimeToLiveHFileCleaner.class.getName());
+  public static final String TTL_CONF_KEY = "hbase.master.hfilecleaner.ttl";
+  // default ttl = 5 minutes
+  public static final long DEFAULT_TTL = 60000 * 5;
+  // Configured time a hfile can be kept after it was moved to the archive
+  private long ttl;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.ttl = conf.getLong(TTL_CONF_KEY, DEFAULT_TTL);
+    super.setConf(conf);
+  }
+
+  @Override
+  public boolean isFileDeletable(FileStatus fStat) {
+    long currentTime = EnvironmentEdgeManager.currentTime();
+    long time = fStat.getModificationTime();
+    long life = currentTime - time;
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("HFile life:" + life + ", ttl:" + ttl + ", current:" + currentTime + ", from: "
+          + time);
+    }
+    if (life < 0) {
+      LOG.warn("Found a hfile (" + fStat.getPath() + ") newer than current time (" + currentTime
+          + " < " + time + "), probably a clock skew");
+      return false;
+    }
+    return life > ttl;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
new file mode 100644
index 0000000..4aad961
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/cleaner/TimeToLiveLogCleaner.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.cleaner;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+/**
+ * Log cleaner that uses the timestamp of the wal to determine if it should
+ * be deleted. By default they are allowed to live for 10 minutes.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+public class TimeToLiveLogCleaner extends BaseLogCleanerDelegate {
+  private static final Log LOG = LogFactory.getLog(TimeToLiveLogCleaner.class.getName());
+  // Configured time a log can be kept after it was closed
+  private long ttl;
+  private boolean stopped = false;
+
+  @Override
+  public boolean isLogDeletable(FileStatus fStat) {
+    long currentTime = EnvironmentEdgeManager.currentTime();
+    long time = fStat.getModificationTime();
+    long life = currentTime - time;
+
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Log life:" + life + ", ttl:" + ttl + ", current:" + currentTime + ", from: "
+          + time);
+    }
+    if (life < 0) {
+      LOG.warn("Found a log (" + fStat.getPath() + ") newer than current time (" + currentTime
+          + " < " + time + "), probably a clock skew");
+      return false;
+    }
+    return life > ttl;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    this.ttl = conf.getLong("hbase.master.logcleaner.ttl", 600000);
+  }
+
+
+  @Override
+  public void stop(String why) {
+    this.stopped = true;
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
new file mode 100644
index 0000000..f9f1c67
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotFileCache.java
@@ -0,0 +1,390 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.snapshot;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * Intelligently keep track of all the files for all the snapshots.
+ * <p>
+ * A cache of files is kept to avoid querying the {@link FileSystem} frequently. If there is a cache
+ * miss the directory modification time is used to ensure that we don't rescan directories that we
+ * already have in cache. We only check the modification times of the snapshot directories
+ * (/hbase/.snapshot/[snapshot_name]) to determine if the files need to be loaded into the cache.
+ * <p>
+ * New snapshots will be added to the cache and deleted snapshots will be removed when we refresh
+ * the cache. If the files underneath a snapshot directory are changed, but not the snapshot itself,
+ * we will ignore updates to that snapshot's files.
+ * <p>
+ * This is sufficient because each snapshot has its own directory and is added via an atomic rename
+ * <i>once</i>, when the snapshot is created. We don't need to worry about the data in the snapshot
+ * being run.
+ * <p>
+ * Further, the cache is periodically refreshed ensure that files in snapshots that were deleted are
+ * also removed from the cache.
+ * <p>
+ * A {@link SnapshotFileCache.SnapshotFileInspector} must be passed when creating <tt>this</tt> to
+ * allow extraction of files under /hbase/.snapshot/[snapshot name] directory, for each snapshot.
+ * This allows you to only cache files under, for instance, all the logs in the .logs directory or
+ * all the files under all the regions.
+ * <p>
+ * <tt>this</tt> also considers all running snapshots (those under /hbase/.snapshot/.tmp) as valid
+ * snapshots and will attempt to cache files from those snapshots as well.
+ * <p>
+ * Queries about a given file are thread-safe with respect to multiple queries and cache refreshes.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class SnapshotFileCache implements Stoppable {
+  interface SnapshotFileInspector {
+    /**
+     * Returns a collection of file names needed by the snapshot.
+     * @param snapshotDir {@link Path} to the snapshot directory to scan.
+     * @return the collection of file names needed by the snapshot.
+     */
+    Collection<String> filesUnderSnapshot(final Path snapshotDir) throws IOException;
+  }
+
+  private static final Log LOG = LogFactory.getLog(SnapshotFileCache.class);
+  private volatile boolean stop = false;
+  private final FileSystem fs;
+  private final SnapshotFileInspector fileInspector;
+  private final Path snapshotDir;
+  private final Set<String> cache = new HashSet<String>();
+  /**
+   * This is a helper map of information about the snapshot directories so we don't need to rescan
+   * them if they haven't changed since the last time we looked.
+   */
+  private final Map<String, SnapshotDirectoryInfo> snapshots =
+      new HashMap<String, SnapshotDirectoryInfo>();
+  private final Timer refreshTimer;
+
+  private long lastModifiedTime = Long.MIN_VALUE;
+
+  /**
+   * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the
+   * filesystem.
+   * <p>
+   * Immediately loads the file cache.
+   * @param conf to extract the configured {@link FileSystem} where the snapshots are stored and
+   *          hbase root directory
+   * @param cacheRefreshPeriod frequency (ms) with which the cache should be refreshed
+   * @param refreshThreadName name of the cache refresh thread
+   * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files.
+   * @throws IOException if the {@link FileSystem} or root directory cannot be loaded
+   */
+  public SnapshotFileCache(Configuration conf, long cacheRefreshPeriod, String refreshThreadName,
+      SnapshotFileInspector inspectSnapshotFiles) throws IOException {
+    this(FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf), 0, cacheRefreshPeriod,
+        refreshThreadName, inspectSnapshotFiles);
+  }
+
+  /**
+   * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the
+   * filesystem
+   * @param fs {@link FileSystem} where the snapshots are stored
+   * @param rootDir hbase root directory
+   * @param cacheRefreshPeriod period (ms) with which the cache should be refreshed
+   * @param cacheRefreshDelay amount of time to wait for the cache to be refreshed
+   * @param refreshThreadName name of the cache refresh thread
+   * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files.
+   */
+  public SnapshotFileCache(FileSystem fs, Path rootDir, long cacheRefreshPeriod,
+      long cacheRefreshDelay, String refreshThreadName, SnapshotFileInspector inspectSnapshotFiles) {
+    this.fs = fs;
+    this.fileInspector = inspectSnapshotFiles;
+    this.snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
+    // periodically refresh the file cache to make sure we aren't superfluously saving files.
+    this.refreshTimer = new Timer(refreshThreadName, true);
+    this.refreshTimer.scheduleAtFixedRate(new RefreshCacheTask(), cacheRefreshDelay,
+      cacheRefreshPeriod);
+  }
+
+  /**
+   * Trigger a cache refresh, even if its before the next cache refresh. Does not affect pending
+   * cache refreshes.
+   * <p>
+   * Blocks until the cache is refreshed.
+   * <p>
+   * Exposed for TESTING.
+   */
+  public void triggerCacheRefreshForTesting() {
+    try {
+      SnapshotFileCache.this.refreshCache();
+    } catch (IOException e) {
+      LOG.warn("Failed to refresh snapshot hfile cache!", e);
+    }
+    LOG.debug("Current cache:" + cache);
+  }
+
+  /**
+   * Check to see if any of the passed file names is contained in any of the snapshots.
+   * First checks an in-memory cache of the files to keep. If its not in the cache, then the cache
+   * is refreshed and the cache checked again for that file.
+   * This ensures that we never return files that exist.
+   * <p>
+   * Note this may lead to periodic false positives for the file being referenced. Periodically, the
+   * cache is refreshed even if there are no requests to ensure that the false negatives get removed
+   * eventually. For instance, suppose you have a file in the snapshot and it gets loaded into the
+   * cache. Then at some point later that snapshot is deleted. If the cache has not been refreshed
+   * at that point, cache will still think the file system contains that file and return
+   * <tt>true</tt>, even if it is no longer present (false positive). However, if the file never was
+   * on the filesystem, we will never find it and always return <tt>false</tt>.
+   * @param files file to check, NOTE: Relies that files are loaded from hdfs before method
+   *              is called (NOT LAZY)
+   * @return <tt>unReferencedFiles</tt> the collection of files that do not have snapshot references
+   * @throws IOException if there is an unexpected error reaching the filesystem.
+   */
+  // XXX this is inefficient to synchronize on the method, when what we really need to guard against
+  // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the
+  // cache, but that seems overkill at the moment and isn't necessarily a bottleneck.
+  public synchronized Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files,
+      final SnapshotManager snapshotManager)
+      throws IOException {
+    List<FileStatus> unReferencedFiles = Lists.newArrayList();
+    List<String> snapshotsInProgress = null;
+    boolean refreshed = false;
+    for (FileStatus file : files) {
+      String fileName = file.getPath().getName();
+      if (!refreshed && !cache.contains(fileName)) {
+        refreshCache();
+        refreshed = true;
+      }
+      if (cache.contains(fileName)) {
+        continue;
+      }
+      if (snapshotsInProgress == null) {
+        snapshotsInProgress = getSnapshotsInProgress(snapshotManager);
+      }
+      if (snapshotsInProgress.contains(fileName)) {
+        continue;
+      }
+      unReferencedFiles.add(file);
+    }
+    return unReferencedFiles;
+  }
+
+  private synchronized void refreshCache() throws IOException {
+    long lastTimestamp = Long.MAX_VALUE;
+    boolean hasChanges = false;
+
+    // get the status of the snapshots directory and check if it is has changes
+    try {
+      FileStatus dirStatus = fs.getFileStatus(snapshotDir);
+      lastTimestamp = dirStatus.getModificationTime();
+      hasChanges |= (lastTimestamp >= lastModifiedTime);
+    } catch (FileNotFoundException e) {
+      if (this.cache.size() > 0) {
+        LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist");
+      }
+      return;
+    }
+
+    // get the status of the snapshots temporary directory and check if it has changes
+    // The top-level directory timestamp is not updated, so we have to check the inner-level.
+    try {
+      Path snapshotTmpDir = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME);
+      FileStatus tempDirStatus = fs.getFileStatus(snapshotTmpDir);
+      lastTimestamp = Math.min(lastTimestamp, tempDirStatus.getModificationTime());
+      hasChanges |= (lastTimestamp >= lastModifiedTime);
+      if (!hasChanges) {
+        FileStatus[] tmpSnapshots = FSUtils.listStatus(fs, snapshotDir);
+        if (tmpSnapshots != null) {
+          for (FileStatus dirStatus: tmpSnapshots) {
+            lastTimestamp = Math.min(lastTimestamp, dirStatus.getModificationTime());
+          }
+          hasChanges |= (lastTimestamp >= lastModifiedTime);
+        }
+      }
+    } catch (FileNotFoundException e) {
+      // Nothing todo, if the tmp dir is empty
+    }
+
+    // if the snapshot directory wasn't modified since we last check, we are done
+    if (!hasChanges) {
+      return;
+    }
+
+    // directory was modified, so we need to reload our cache
+    // there could be a slight race here where we miss the cache, check the directory modification
+    // time, then someone updates the directory, causing us to not scan the directory again.
+    // However, snapshot directories are only created once, so this isn't an issue.
+
+    // 1. update the modified time
+    this.lastModifiedTime = lastTimestamp;
+
+    // 2.clear the cache
+    this.cache.clear();
+    Map<String, SnapshotDirectoryInfo> known = new HashMap<String, SnapshotDirectoryInfo>();
+
+    // 3. check each of the snapshot directories
+    FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir);
+    if (snapshots == null) {
+      // remove all the remembered snapshots because we don't have any left
+      if (LOG.isDebugEnabled() && this.snapshots.size() > 0) {
+        LOG.debug("No snapshots on-disk, cache empty");
+      }
+      this.snapshots.clear();
+      return;
+    }
+
+    // 3.1 iterate through the on-disk snapshots
+    for (FileStatus snapshot : snapshots) {
+      String name = snapshot.getPath().getName();
+      // its not the tmp dir,
+      if (!name.equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) {
+        SnapshotDirectoryInfo files = this.snapshots.remove(name);
+        // 3.1.1 if we don't know about the snapshot or its been modified, we need to update the
+        // files the latter could occur where I create a snapshot, then delete it, and then make a
+        // new snapshot with the same name. We will need to update the cache the information from
+        // that new snapshot, even though it has the same name as the files referenced have
+        // probably changed.
+        if (files == null || files.hasBeenModified(snapshot.getModificationTime())) {
+          // get all files for the snapshot and create a new info
+          Collection<String> storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath());
+          files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles);
+        }
+        // 3.2 add all the files to cache
+        this.cache.addAll(files.getFiles());
+        known.put(name, files);
+      }
+    }
+
+    // 4. set the snapshots we are tracking
+    this.snapshots.clear();
+    this.snapshots.putAll(known);
+  }
+
+  @VisibleForTesting List<String> getSnapshotsInProgress(
+    final SnapshotManager snapshotManager) throws IOException {
+    List<String> snapshotInProgress = Lists.newArrayList();
+    // only add those files to the cache, but not to the known snapshots
+    Path snapshotTmpDir = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME);
+    // only add those files to the cache, but not to the known snapshots
+    FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir);
+    if (running != null) {
+      for (FileStatus run : running) {
+        ReentrantLock lock = null;
+        if (snapshotManager != null) {
+          lock = snapshotManager.getLocks().acquireLock(run.getPath().getName());
+        }
+        try {
+          snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
+        } catch (CorruptedSnapshotException e) {
+          // See HBASE-16464
+          if (e.getCause() instanceof FileNotFoundException) {
+            // If the snapshot is corrupt, we will delete it
+            fs.delete(run.getPath(), true);
+            LOG.warn("delete the " + run.getPath() + " due to exception:", e.getCause());
+          } else {
+            throw e;
+          }
+        } finally {
+          if (lock != null) {
+            lock.unlock();
+          }
+        }
+      }
+    }
+    return snapshotInProgress;
+  }
+
+  /**
+   * Simple helper task that just periodically attempts to refresh the cache
+   */
+  public class RefreshCacheTask extends TimerTask {
+    @Override
+    public void run() {
+      try {
+        SnapshotFileCache.this.refreshCache();
+      } catch (IOException e) {
+        LOG.warn("Failed to refresh snapshot hfile cache!", e);
+      }
+    }
+  }
+
+  @Override
+  public void stop(String why) {
+    if (!this.stop) {
+      this.stop = true;
+      this.refreshTimer.cancel();
+    }
+
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stop;
+  }
+
+  /**
+   * Information about a snapshot directory
+   */
+  private static class SnapshotDirectoryInfo {
+    long lastModified;
+    Collection<String> files;
+
+    public SnapshotDirectoryInfo(long mtime, Collection<String> files) {
+      this.lastModified = mtime;
+      this.files = files;
+    }
+
+    /**
+     * @return the hfiles in the snapshot when <tt>this</tt> was made.
+     */
+    public Collection<String> getFiles() {
+      return this.files;
+    }
+
+    /**
+     * Check if the snapshot directory has been modified
+     * @param mtime current modification time of the directory
+     * @return <tt>true</tt> if it the modification time of the directory is newer time when we
+     *         created <tt>this</tt>
+     */
+    public boolean hasBeenModified(long mtime) {
+      return this.lastModified < mtime;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
new file mode 100644
index 0000000..89704f0
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/fs/legacy/snapshot/SnapshotHFileCleaner.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.fs.legacy.snapshot;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.fs.legacy.cleaner.BaseHFileCleanerDelegate;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
+import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * Implementation of a file cleaner that checks if a hfile is still used by snapshots of HBase
+ * tables.
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
+@InterfaceStability.Evolving
+public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate {
+  private static final Log LOG = LogFactory.getLog(SnapshotHFileCleaner.class);
+
+  /**
+   * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in
+   * snapshots (ms)
+   */
+  public static final String HFILE_CACHE_REFRESH_PERIOD_CONF_KEY =
+      "hbase.master.hfilecleaner.plugins.snapshot.period";
+
+  /** Refresh cache, by default, every 5 minutes */
+  private static final long DEFAULT_HFILE_CACHE_REFRESH_PERIOD = 300000;
+
+  /** File cache for HFiles in the completed and currently running snapshots */
+  private SnapshotFileCache cache;
+
+  private MasterServices master;
+
+  @Override
+  public synchronized Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
+    try {
+      if (master != null) return cache.getUnreferencedFiles(files, master.getSnapshotManager());
+    } catch (CorruptedSnapshotException cse) {
+      LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse);
+    } catch (IOException e) {
+      LOG.error("Exception while checking if files were valid, keeping them just in case.", e);
+    }
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void init(Map<String, Object> params) {
+    if (params != null && params.containsKey(HMaster.MASTER)) {
+      this.master = (MasterServices) params.get(HMaster.MASTER);
+    }
+  }
+
+  @Override
+  protected boolean isFileDeletable(FileStatus fStat) {
+    return false;
+  }
+
+  public void setConf(final Configuration conf) {
+    super.setConf(conf);
+    try {
+      long cacheRefreshPeriod = conf.getLong(HFILE_CACHE_REFRESH_PERIOD_CONF_KEY,
+        DEFAULT_HFILE_CACHE_REFRESH_PERIOD);
+      final FileSystem fs = FSUtils.getCurrentFileSystem(conf);
+      Path rootDir = FSUtils.getRootDir(conf);
+      cache = new SnapshotFileCache(fs, rootDir, cacheRefreshPeriod, cacheRefreshPeriod,
+          "snapshot-hfile-cleaner-cache-refresher", new SnapshotFileCache.SnapshotFileInspector() {
+            public Collection<String> filesUnderSnapshot(final Path snapshotDir)
+                throws IOException {
+              return SnapshotReferenceUtil.getHFileNames(conf, fs, snapshotDir);
+            }
+          });
+    } catch (IOException e) {
+      LOG.error("Failed to create cleaner util", e);
+    }
+  }
+
+
+  @Override
+  public void stop(String why) {
+    this.cache.stop(why);
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.cache.isStopped();
+  }
+
+  /**
+   * Exposed for Testing!
+   * @return the cache of all hfiles
+   */
+  public SnapshotFileCache getFileCacheForTesting() {
+    return this.cache;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/53f4ec9e/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
index 3caf67f..d3acb05 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
  * {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
  *
  * <p><b>Back-references:</b>
- * To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of
+ * To help the {@link org.apache.hadoop.hbase.fs.legacy.cleaner.CleanerChore} to keep track of
  * the links to a particular file, during the {@code FileLink} creation, a new file is placed
  * inside a back-reference directory. There's one back-reference directory for each file that
  * has links, and in the directory there's one file per link.