You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2012/11/01 21:54:07 UTC

svn commit: r1404762 [1/2] - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/backup/ main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/master/cleaner/ main/java/org/apache/hado...

Author: larsh
Date: Thu Nov  1 20:54:05 2012
New Revision: 1404762

URL: http://svn.apache.org/viewvc?rev=1404762&view=rev
Log:
HBASE-6796 Backport HBASE-5547, Don't delete HFiles in backup mode. (Jesse Yates)

Added:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/backup/
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/cleaner/
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileCleaner.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestLogsCleaner.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/CheckedArchivingHFileCleaner.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/HFileArchiveTestingUtil.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java
Removed:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/LogCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java
Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/Chore.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
    hbase/branches/0.94/src/main/resources/hbase-default.xml
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/BaseConfigurable.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * HBase version of Hadoop's Configured class that doesn't initialize the
+ * configuration via {@link #setConf(Configuration)} in the constructor, but
+ * only sets the configuration through the {@link #setConf(Configuration)}
+ * method
+ */
+public class BaseConfigurable implements Configurable {
+
+  private Configuration conf;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return this.conf;
+  }
+}
\ No newline at end of file

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/Chore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/Chore.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/Chore.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/Chore.java Thu Nov  1 20:54:05 2012
@@ -78,6 +78,7 @@ public abstract class Chore extends HasT
       LOG.fatal(getName() + "error", t);
     } finally {
       LOG.info(getName() + " exiting");
+      cleanup();
     }
   }
 
@@ -110,4 +111,11 @@ public abstract class Chore extends HasT
   protected void sleep() {
     this.sleeper.sleep();
   }
+
+  /**
+   * Called when the chore has completed, allowing subclasses to cleanup any
+   * extra overhead
+   */
+  protected void cleanup() {
+  }
 }

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/HConstants.java Thu Nov  1 20:54:05 2012
@@ -590,12 +590,6 @@ public final class HConstants {
     */
   public static final float HBASE_CLUSTER_MINIMUM_MEMORY_THRESHOLD = 0.2f;
 
-  public static final List<String> HBASE_NON_USER_TABLE_DIRS = new ArrayList<String>(
-      Arrays.asList(new String[]{ HREGION_LOGDIR_NAME, HREGION_OLDLOGDIR_NAME,
-          CORRUPT_DIR_NAME, Bytes.toString(META_TABLE_NAME),
-          Bytes.toString(ROOT_TABLE_NAME), SPLIT_LOGDIR_NAME,
-          HBCK_SIDELINEDIR_NAME }));
-
   public static final Pattern CP_HTD_ATTR_KEY_PATTERN = Pattern.compile
       ("^coprocessor\\$([0-9]+)$", Pattern.CASE_INSENSITIVE);
   public static final Pattern CP_HTD_ATTR_VALUE_PATTERN =
@@ -666,6 +660,14 @@ public final class HConstants {
    * The actual value is irrelevant because this is always compared by reference.
    */
   public static final byte [] NO_NEXT_INDEXED_KEY = Bytes.toBytes("NO_NEXT_INDEXED_KEY");
+  
+  /** Directory under /hbase where archived hfiles are stored */
+  public static final String HFILE_ARCHIVE_DIRECTORY = ".archive";
+
+  public static final List<String> HBASE_NON_USER_TABLE_DIRS = new ArrayList<String>(
+      Arrays.asList(new String[] { HREGION_LOGDIR_NAME, HREGION_OLDLOGDIR_NAME, CORRUPT_DIR_NAME,
+          Bytes.toString(META_TABLE_NAME), Bytes.toString(ROOT_TABLE_NAME), SPLIT_LOGDIR_NAME,
+          HBCK_SIDELINEDIR_NAME, HFILE_ARCHIVE_DIRECTORY }));
 
   private HConstants() {
     // Can't be instantiated with this ctor.

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,625 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.apache.hadoop.io.MultipleIOException;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
+/**
+ * Utility class to handle the removal of HFiles (or the respective {@link StoreFile StoreFiles})
+ * for a HRegion from the {@link FileSystem}. The hfiles will be archived or deleted, depending on
+ * the state of the system.
+ */
+public class HFileArchiver {
+  private static final Log LOG = LogFactory.getLog(HFileArchiver.class);
+  private static final String SEPARATOR = ".";
+
+  private HFileArchiver() {
+    // hidden ctor since this is just a util
+  }
+
+  /**
+   * Cleans up all the files for a HRegion by archiving the HFiles to the
+   * archive directory
+   * @param fs the file system object
+   * @param info HRegionInfo for region to be deleted
+   * @throws IOException
+   */
+  public static void archiveRegion(FileSystem fs, HRegionInfo info)
+      throws IOException {
+    Path rootDir = FSUtils.getRootDir(fs.getConf());
+    archiveRegion(fs, rootDir, HTableDescriptor.getTableDir(rootDir, info.getTableName()),
+      HRegion.getRegionDir(rootDir, info));
+  }
+
+
+  /**
+   * Remove an entire region from the table directory via archiving the region's hfiles.
+   * @param fs {@link FileSystem} from which to remove the region
+   * @param rootdir {@link Path} to the root directory where hbase files are stored (for building
+   *          the archive path)
+   * @param tableDir {@link Path} to where the table is being stored (for building the archive path)
+   * @param regionDir {@link Path} to where a region is being stored (for building the archive path)
+   * @return <tt>true</tt> if the region was sucessfully deleted. <tt>false</tt> if the filesystem
+   *         operations could not complete.
+   * @throws IOException if the request cannot be completed
+   */
+  public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir)
+      throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("ARCHIVING region " + regionDir.toString());
+    }
+
+    // otherwise, we archive the files
+    // make sure we can archive
+    if (tableDir == null || regionDir == null) {
+      LOG.error("No archive directory could be found because tabledir (" + tableDir
+          + ") or regiondir (" + regionDir + "was null. Deleting files instead.");
+      deleteRegionWithoutArchiving(fs, regionDir);
+      // we should have archived, but failed to. Doesn't matter if we deleted
+      // the archived files correctly or not.
+      return false;
+    }
+
+    // make sure the regiondir lives under the tabledir
+    Preconditions.checkArgument(regionDir.toString().startsWith(tableDir.toString()));
+    Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(fs.getConf(), tableDir, regionDir);
+
+    LOG.debug("Have an archive directory, preparing to move files");
+    FileStatusConverter getAsFile = new FileStatusConverter(fs);
+    // otherwise, we attempt to archive the store files
+
+    // build collection of just the store directories to archive
+    Collection<File> toArchive = new ArrayList<File>();
+    final PathFilter dirFilter = new FSUtils.DirFilter(fs);
+    PathFilter nonHidden = new PathFilter() {
+      @Override
+      public boolean accept(Path file) {
+        return dirFilter.accept(file) && !file.getName().toString().startsWith(".");
+      }
+    };
+    FileStatus[] storeDirs = FSUtils.listStatus(fs, regionDir, nonHidden);
+    // if there no files, we can just delete the directory and return;
+    if (storeDirs == null) {
+      LOG.debug("Region directory (" + regionDir + ") was empty, just deleting and returning!");
+      return deleteRegionWithoutArchiving(fs, regionDir);
+    }
+
+    // convert the files in the region to a File
+    toArchive.addAll(Lists.transform(Arrays.asList(storeDirs), getAsFile));
+    LOG.debug("Archiving:" + toArchive);
+    boolean success = false;
+    try {
+      success = resolveAndArchive(fs, regionArchiveDir, toArchive);
+    } catch (IOException e) {
+      success = false;
+    }
+
+    // if that was successful, then we delete the region
+    if (success) {
+      LOG.debug("Successfully resolved and archived, now can just delete region.");
+      return deleteRegionWithoutArchiving(fs, regionDir);
+    }
+
+    throw new IOException("Received error when attempting to archive files (" + toArchive
+        + "), cannot delete region directory.");
+  }
+
+  /**
+   * Remove the store files, either by archiving them or outright deletion
+   * @param fs the filesystem where the store files live
+   * @param parent Parent region hosting the store files
+   * @param conf {@link Configuration} to examine to determine the archive directory
+   * @param family the family hosting the store files
+   * @param compactedFiles files to be disposed of. No further reading of these files should be
+   *          attempted; otherwise likely to cause an {@link IOException}
+   * @throws IOException if the files could not be correctly disposed.
+   */
+  public static void archiveStoreFiles(FileSystem fs, HRegion parent,
+      Configuration conf, byte[] family, Collection<StoreFile> compactedFiles) throws IOException {
+
+    // sometimes in testing, we don't have rss, so we need to check for that
+    if (fs == null) {
+      LOG.warn("Passed filesystem is null, so just deleting the files without archiving for region:"
+          + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family));
+      deleteStoreFilesWithoutArchiving(compactedFiles);
+      return;
+    }
+
+    // short circuit if we don't have any files to delete
+    if (compactedFiles.size() == 0) {
+      LOG.debug("No store files to dispose, done!");
+      return;
+    }
+
+    // build the archive path
+    if (parent == null || family == null) throw new IOException(
+        "Need to have a parent region and a family to archive from.");
+
+    Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, parent, family);
+
+    // make sure we don't archive if we can't and that the archive dir exists
+    if (!fs.mkdirs(storeArchiveDir)) {
+      throw new IOException("Could not make archive directory (" + storeArchiveDir + ") for store:"
+          + Bytes.toString(family) + ", deleting compacted files instead.");
+    }
+
+    // otherwise we attempt to archive the store files
+    LOG.debug("Archiving compacted store files.");
+
+    // wrap the storefile into a File
+    StoreToFile getStorePath = new StoreToFile(fs);
+    Collection<File> storeFiles = Collections2.transform(compactedFiles, getStorePath);
+
+    // do the actual archive
+    if (!resolveAndArchive(fs, storeArchiveDir, storeFiles)) {
+      throw new IOException("Failed to archive/delete all the files for region:"
+          + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family)
+          + " into " + storeArchiveDir + "Something is probably arwy on the filesystem.");
+    }
+  }
+
+  /**
+   * Archive the given files and resolve any conflicts with existing files via appending the time
+   * archiving started (so all conflicts in the same group have the same timestamp appended).
+   * <p>
+   * If any of the passed files to archive are directories, archives the all files under that
+   * directory. Archive directory structure for children is the base archive directory name + the
+   * parent directory and is built recursively is passed files are directories themselves.
+   * @param fs {@link FileSystem} on which to archive the files
+   * @param baseArchiveDir base archive directory to archive the given files
+   * @param toArchive files to be archived
+   * @return <tt>true</tt> on success, <tt>false</tt> otherwise
+   * @throws IOException on unexpected failure
+   */
+  private static boolean resolveAndArchive(FileSystem fs, Path baseArchiveDir,
+      Collection<File> toArchive) throws IOException {
+    LOG.debug("Starting to archive files:" + toArchive);
+    long start = EnvironmentEdgeManager.currentTimeMillis();
+    List<File> failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start);
+
+    // clean out the failures by just deleting them
+    if (failures.size() > 0) {
+      try {
+        LOG.error("Failed to complete archive, deleting extra store files.");
+        deleteFilesWithoutArchiving(failures);
+      } catch (IOException e) {
+        LOG.warn("Failed to delete store file(s) when archiving failed", e);
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Resolve any conflict with an existing archive file via timestamp-append
+   * renaming of the existing file and then archive the passed in files.
+   * @param fs {@link FileSystem} on which to archive the files
+   * @param baseArchiveDir base archive directory to store the files. If any of
+   *          the files to archive are directories, will append the name of the
+   *          directory to the base archive directory name, creating a parallel
+   *          structure.
+   * @param toArchive files/directories that need to be archvied
+   * @param start time the archiving started - used for resolving archive
+   *          conflicts.
+   * @return the list of failed to archive files.
+   * @throws IOException if an unexpected file operation exception occured
+   */
+  private static List<File> resolveAndArchive(FileSystem fs, Path baseArchiveDir,
+      Collection<File> toArchive, long start) throws IOException {
+    // short circuit if no files to move
+    if (toArchive.size() == 0) return Collections.emptyList();
+
+    LOG.debug("moving files to the archive directory: " + baseArchiveDir);
+
+    // make sure the archive directory exists
+    if (!fs.exists(baseArchiveDir)) {
+      if (!fs.mkdirs(baseArchiveDir)) {
+        throw new IOException("Failed to create the archive directory:" + baseArchiveDir
+            + ", quitting archive attempt.");
+      }
+      LOG.debug("Created archive directory:" + baseArchiveDir);
+    }
+
+    List<File> failures = new ArrayList<File>();
+    String startTime = Long.toString(start);
+    for (File file : toArchive) {
+      // if its a file archive it
+      try {
+        LOG.debug("Archiving:" + file);
+        if (file.isFile()) {
+          // attempt to archive the file
+          if (!resolveAndArchiveFile(baseArchiveDir, file, startTime)) {
+            LOG.warn("Couldn't archive " + file + " into backup directory: " + baseArchiveDir);
+            failures.add(file);
+          }
+        } else {
+          // otherwise its a directory and we need to archive all files
+          LOG.debug(file + " is a directory, archiving children files");
+          // so we add the directory name to the one base archive
+          Path parentArchiveDir = new Path(baseArchiveDir, file.getName());
+          // and then get all the files from that directory and attempt to
+          // archive those too
+          Collection<File> children = file.getChildren();
+          failures.addAll(resolveAndArchive(fs, parentArchiveDir, children, start));
+        }
+      } catch (IOException e) {
+        LOG.warn("Failed to archive file: " + file, e);
+        failures.add(file);
+      }
+    }
+    return failures;
+  }
+
+  /**
+   * Attempt to archive the passed in file to the archive directory.
+   * <p>
+   * If the same file already exists in the archive, it is moved to a timestamped directory under
+   * the archive directory and the new file is put in its place.
+   * @param archiveDir {@link Path} to the directory that stores the archives of the hfiles
+   * @param currentFile {@link Path} to the original HFile that will be archived
+   * @param archiveStartTime time the archiving started, to resolve naming conflicts
+   * @return <tt>true</tt> if the file is successfully archived. <tt>false</tt> if there was a
+   *         problem, but the operation still completed.
+   * @throws IOException on failure to complete {@link FileSystem} operations.
+   */
+  private static boolean resolveAndArchiveFile(Path archiveDir, File currentFile,
+      String archiveStartTime) throws IOException {
+    // build path as it should be in the archive
+    String filename = currentFile.getName();
+    Path archiveFile = new Path(archiveDir, filename);
+    FileSystem fs = currentFile.getFileSystem();
+
+    // if the file already exists in the archive, move that one to a timestamped backup. This is a
+    // really, really unlikely situtation, where we get the same name for the existing file, but
+    // is included just for that 1 in trillion chance.
+    if (fs.exists(archiveFile)) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("File:" + archiveFile + " already exists in archive, moving to "
+            + "timestamped backup and overwriting current.");
+      }
+
+      // move the archive file to the stamped backup
+      Path backedupArchiveFile = new Path(archiveDir, filename + SEPARATOR + archiveStartTime);
+      if (!fs.rename(archiveFile, backedupArchiveFile)) {
+        LOG.error("Could not rename archive file to backup: " + backedupArchiveFile
+            + ", deleting existing file in favor of newer.");
+        // try to delete the exisiting file, if we can't rename it
+        if (!fs.delete(archiveFile, false)) {
+          throw new IOException("Couldn't delete existing archive file (" + archiveFile
+              + ") or rename it to the backup file (" + backedupArchiveFile
+              + ")to make room for similarly named file.");
+        }
+      }
+      LOG.debug("Backed up archive file from: " + archiveFile);
+    }
+
+    LOG.debug("No existing file in archive for:" + archiveFile + ", free to archive original file.");
+
+    // at this point, we should have a free spot for the archive file
+    if (currentFile.moveAndClose(archiveFile)) {
+      LOG.error("Failed to archive file:" + currentFile);
+      return false;
+    } else if (LOG.isDebugEnabled()) {
+      LOG.debug("Finished archiving file from: " + currentFile + ", to: " + archiveFile);
+    }
+    return true;
+  }
+
+  /**
+   * Simple delete of regular files from the {@link FileSystem}.
+   * <p>
+   * This method is a more generic implementation that the other deleteXXX
+   * methods in this class, allowing more code reuse at the cost of a couple
+   * more, short-lived objects (which should have minimum impact on the jvm).
+   * @param fs {@link FileSystem} where the files live
+   * @param files {@link Collection} of files to be deleted
+   * @throws IOException if a file cannot be deleted. All files will be
+   *           attempted to deleted before throwing the exception, rather than
+   *           failing at the first file.
+   */
+  private static void deleteFilesWithoutArchiving(Collection<File> files) throws IOException {
+    List<IOException> errors = new ArrayList<IOException>(0);
+    for (File file : files) {
+      try {
+        LOG.debug("Deleting region file:" + file);
+        file.delete();
+      } catch (IOException e) {
+        LOG.error("Failed to delete file:" + file);
+        errors.add(e);
+      }
+    }
+    if (errors.size() > 0) {
+      throw MultipleIOException.createIOException(errors);
+    }
+  }
+
+  /**
+   * Without regard for backup, delete a region. Should be used with caution.
+   * @param regionDir {@link Path} to the region to be deleted.
+   * @param fs FileSystem from which to delete the region
+   * @return <tt>true</tt> on successful deletion, <tt>false</tt> otherwise
+   * @throws IOException on filesystem operation failure
+   */
+  private static boolean deleteRegionWithoutArchiving(FileSystem fs, Path regionDir)
+      throws IOException {
+    if (fs.delete(regionDir, true)) {
+      LOG.debug("Deleted all region files in: " + regionDir);
+      return true;
+    }
+    LOG.debug("Failed to delete region directory:" + regionDir);
+    return false;
+  }
+
+  /**
+   * Just do a simple delete of the given store files
+   * <p>
+   * A best effort is made to delete each of the files, rather than bailing on the first failure.
+   * <p>
+   * This method is preferable to {@link #deleteFilesWithoutArchiving(Collection)} since it consumes
+   * less resources, but is limited in terms of usefulness
+   * @param compactedFiles store files to delete from the file system.
+   * @throws IOException if a file cannot be deleted. All files will be attempted to deleted before
+   *           throwing the exception, rather than failing at the first file.
+   */
+  private static void deleteStoreFilesWithoutArchiving(Collection<StoreFile> compactedFiles)
+      throws IOException {
+    LOG.debug("Deleting store files without archiving.");
+    List<IOException> errors = new ArrayList<IOException>(0);
+    for (StoreFile hsf : compactedFiles) {
+      try {
+        hsf.deleteReader();
+      } catch (IOException e) {
+        LOG.error("Failed to delete store file:" + hsf.getPath());
+        errors.add(e);
+      }
+    }
+    if (errors.size() > 0) {
+      throw MultipleIOException.createIOException(errors);
+    }
+  }
+
+  /**
+   * Adapt a type to match the {@link File} interface, which is used internally for handling
+   * archival/removal of files
+   * @param <T> type to adapt to the {@link File} interface
+   */
+  private static abstract class FileConverter<T> implements Function<T, File> {
+    protected final FileSystem fs;
+
+    public FileConverter(FileSystem fs) {
+      this.fs = fs;
+    }
+  }
+
+  /**
+   * Convert a FileStatus to something we can manage in the archiving
+   */
+  private static class FileStatusConverter extends FileConverter<FileStatus> {
+    public FileStatusConverter(FileSystem fs) {
+      super(fs);
+    }
+
+    @Override
+    public File apply(FileStatus input) {
+      return new FileablePath(fs, input.getPath());
+    }
+  }
+
+  /**
+   * Convert the {@link StoreFile} into something we can manage in the archive
+   * methods
+   */
+  private static class StoreToFile extends FileConverter<StoreFile> {
+    public StoreToFile(FileSystem fs) {
+      super(fs);
+    }
+
+    @Override
+    public File apply(StoreFile input) {
+      return new FileableStoreFile(fs, input);
+    }
+  }
+
+  /**
+   * Wrapper to handle file operations uniformly
+   */
+  private static abstract class File {
+    protected final FileSystem fs;
+
+    public File(FileSystem fs) {
+      this.fs = fs;
+    }
+
+    /**
+     * Delete the file
+     * @throws IOException on failure
+     */
+    abstract void delete() throws IOException;
+
+    /**
+     * Check to see if this is a file or a directory
+     * @return <tt>true</tt> if it is a file, <tt>false</tt> otherwise
+     * @throws IOException on {@link FileSystem} connection error
+     */
+    abstract boolean isFile() throws IOException;
+
+    /**
+     * @return if this is a directory, returns all the children in the
+     *         directory, otherwise returns an empty list
+     * @throws IOException
+     */
+    abstract Collection<File> getChildren() throws IOException;
+
+    /**
+     * close any outside readers of the file
+     * @throws IOException
+     */
+    abstract void close() throws IOException;
+
+    /**
+     * @return the name of the file (not the full fs path, just the individual
+     *         file name)
+     */
+    abstract String getName();
+
+    /**
+     * @return the path to this file
+     */
+    abstract Path getPath();
+
+    /**
+     * Move the file to the given destination
+     * @param dest
+     * @return <tt>true</tt> on success
+     * @throws IOException
+     */
+    public boolean moveAndClose(Path dest) throws IOException {
+      this.close();
+      Path p = this.getPath();
+      return !fs.rename(p, dest);
+    }
+
+    /**
+     * @return the {@link FileSystem} on which this file resides
+     */
+    public FileSystem getFileSystem() {
+      return this.fs;
+    }
+
+    @Override
+    public String toString() {
+      return this.getClass() + ", file:" + getPath().toString();
+    }
+  }
+
+  /**
+   * A {@link File} that wraps a simple {@link Path} on a {@link FileSystem}.
+   */
+  private static class FileablePath extends File {
+    private final Path file;
+    private final FileStatusConverter getAsFile;
+
+    public FileablePath(FileSystem fs, Path file) {
+      super(fs);
+      this.file = file;
+      this.getAsFile = new FileStatusConverter(fs);
+    }
+
+    @Override
+    public void delete() throws IOException {
+      if (!fs.delete(file, true)) throw new IOException("Failed to delete:" + this.file);
+    }
+
+    @Override
+    public String getName() {
+      return file.getName();
+    }
+
+    @Override
+    public Collection<File> getChildren() throws IOException {
+      if (fs.isFile(file)) return Collections.emptyList();
+      return Collections2.transform(Arrays.asList(fs.listStatus(file)), getAsFile);
+    }
+
+    @Override
+    public boolean isFile() throws IOException {
+      return fs.isFile(file);
+    }
+
+    @Override
+    public void close() throws IOException {
+      // NOOP - files are implicitly closed on removal
+    }
+
+    @Override
+    Path getPath() {
+      return file;
+    }
+  }
+
+  /**
+   * {@link File} adapter for a {@link StoreFile} living on a {@link FileSystem}
+   * .
+   */
+  private static class FileableStoreFile extends File {
+    StoreFile file;
+
+    public FileableStoreFile(FileSystem fs, StoreFile store) {
+      super(fs);
+      this.file = store;
+    }
+
+    @Override
+    public void delete() throws IOException {
+      file.deleteReader();
+    }
+
+    @Override
+    public String getName() {
+      return file.getPath().getName();
+    }
+
+    @Override
+    public boolean isFile() {
+      return true;
+    }
+
+    @Override
+    public Collection<File> getChildren() throws IOException {
+      // storefiles don't have children
+      return Collections.emptyList();
+    }
+
+    @Override
+    public void close() throws IOException {
+      file.closeReader(true);
+    }
+
+    @Override
+    Path getPath() {
+      return file.getPath();
+    }
+  }
+}

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java Thu Nov  1 20:54:05 2012
@@ -39,11 +39,10 @@ import org.apache.hadoop.hbase.HConstant
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.TableExistsException;
+import org.apache.hadoop.hbase.backup.HFileArchiver;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.catalog.MetaReader;
 import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Store;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -51,7 +50,6 @@ import org.apache.hadoop.hbase.util.FSUt
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Writables;
 
-
 /**
  * A janitor for the catalog tables.  Scans the <code>.META.</code> catalog
  * table on a period looking for unused regions to garbage collect.
@@ -237,7 +235,7 @@ class CatalogJanitor extends Chore {
     if (hasNoReferences(a) && hasNoReferences(b)) {
       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
         " because daughter splits no longer hold references");
-	  // wipe out daughter references from parent region
+      // wipe out daughter references from parent region in meta
       removeDaughtersFromParent(parent);
 
       // This latter regionOffline should not be necessary but is done for now
@@ -248,8 +246,7 @@ class CatalogJanitor extends Chore {
         this.services.getAssignmentManager().regionOffline(parent);
       }
       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
-      Path rootdir = this.services.getMasterFileSystem().getRootDir();
-      HRegion.deleteRegion(fs, rootdir, parent);
+      HFileArchiver.archiveRegion(fs, parent);
       MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
       result = true;
     }

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Thu Nov  1 20:54:05 2012
@@ -44,6 +44,7 @@ import javax.management.ObjectName;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Chore;
 import org.apache.hadoop.hbase.ClusterStatus;
 import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -75,6 +76,8 @@ import org.apache.hadoop.hbase.ipc.HMast
 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
 import org.apache.hadoop.hbase.ipc.RpcServer;
+import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
+import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
@@ -93,6 +96,7 @@ import org.apache.hadoop.hbase.replicati
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.hadoop.hbase.util.HasThread;
 import org.apache.hadoop.hbase.util.InfoServer;
 import org.apache.hadoop.hbase.util.Pair;
@@ -203,6 +207,7 @@ Server {
 
   private CatalogJanitor catalogJanitorChore;
   private LogCleaner logCleaner;
+  private HFileCleaner hfileCleaner;
 
   private MasterCoprocessorHost cpHost;
   private final ServerName serverName;
@@ -834,12 +839,19 @@ Server {
 
    // Start log cleaner thread
    String n = Thread.currentThread().getName();
+   int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
    this.logCleaner =
-      new LogCleaner(conf.getInt("hbase.master.cleaner.interval", 60 * 1000),
+      new LogCleaner(cleanerInterval,
          this, conf, getMasterFileSystem().getFileSystem(),
          getMasterFileSystem().getOldLogDir());
          Threads.setDaemonThreadRunning(logCleaner.getThread(), n + ".oldLogCleaner");
 
+   //start the hfile archive cleaner thread
+    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
+    this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
+        .getFileSystem(), archiveDir);
+    Threads.setDaemonThreadRunning(hfileCleaner.getThread(), n + ".archivedHFileCleaner");
+
    // Put up info server.
    int port = this.conf.getInt("hbase.master.info.port", 60010);
    if (port >= 0) {
@@ -866,6 +878,8 @@ Server {
     if (this.rpcServer != null) this.rpcServer.stop();
     // Clean up and close up shop
     if (this.logCleaner!= null) this.logCleaner.interrupt();
+    if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
+
     if (this.infoServer != null) {
       LOG.info("Stopping infoServer");
       try {
@@ -1815,4 +1829,12 @@ Server {
     MBeanUtil.registerMBean("Master", "Master", mxBeanInfo);
     LOG.info("Registered HMaster MXBean");
   }
-}
+
+  /**
+   * Exposed for Testing!
+   * @return the current hfile cleaner
+   */
+  public HFileCleaner getHFileCleaner() {
+    return this.hfileCleaner;
+  }
+}
\ No newline at end of file

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Thu Nov  1 20:54:05 2012
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.InvalidFa
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.backup.HFileArchiver;
 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
@@ -440,7 +441,7 @@ public class MasterFileSystem {
 
 
   public void deleteRegion(HRegionInfo region) throws IOException {
-    fs.delete(HRegion.getRegionDir(rootdir, region), true);
+    HFileArchiver.archiveRegion(fs, region);
   }
 
   public void deleteTable(byte[] tableName) throws IOException {

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseHFileCleanerDelegate.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.BaseConfigurable;
+
+/**
+ * Base class for the hfile cleaning function inside the master. By default, only the
+ * {@link TimeToLiveHFileCleaner} is called.
+ * <p>
+ * If other effects are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.hfilecleaner.plugins", which is a comma-separated list of fully
+ * qualified class names. The <code>HFileCleaner<code> will build the cleaner chain in 
+ * order the order specified by the configuration.
+ * <p>
+ * For subclasses, setConf will be called exactly <i>once</i> before using the cleaner.
+ * <p>
+ * Since {@link BaseHFileCleanerDelegate HFileCleanerDelegates} are created in
+ * HFileCleaner by reflection, classes that implements this interface <b>must</b>
+ * provide a default constructor.
+ */
+@InterfaceAudience.Private
+public abstract class BaseHFileCleanerDelegate extends BaseConfigurable implements
+    FileCleanerDelegate {
+
+  private boolean stopped = false;
+
+  @Override
+  public void stop(String why) {
+    this.stopped = true;
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+}
\ No newline at end of file

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseLogCleanerDelegate.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.BaseConfigurable;
+
+/**
+ * Base class for the log cleaning function inside the master. By default, two
+ * cleaners: <code>TimeToLiveLogCleaner</code> and
+ * <code>ReplicationLogCleaner</code> are called in order. So if other effects
+ * are needed, implement your own LogCleanerDelegate and add it to the
+ * configuration "hbase.master.logcleaner.plugins", which is a comma-separated
+ * list of fully qualified class names. LogsCleaner will add it to the chain.
+ * <p>
+ * HBase ships with LogsCleaner as the default implementation.
+ * <p>
+ * This interface extends Configurable, so setConf needs to be called once
+ * before using the cleaner. Since LogCleanerDelegates are created in
+ * LogsCleaner by reflection. Classes that implements this interface should
+ * provide a default constructor.
+ */
+@InterfaceAudience.Private
+public abstract class BaseLogCleanerDelegate extends BaseConfigurable implements FileCleanerDelegate {
+
+  @Override
+  public boolean isFileDeletable(Path file) {
+    return isLogDeletable(file);
+  }
+
+  /**
+   * Should the master delete the log or keep it?
+   * <p>
+   * Implementing classes should override {@link #isFileDeletable(Path)} instead.
+   * @param filePath full path to log.
+   * @return true if the log is deletable, false if not
+   */
+  @Deprecated
+  public abstract boolean isLogDeletable(Path filePath);
+}
\ No newline at end of file

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Chore;
+import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+/**
+ * Abstract Cleaner that uses a chain of delegates to clean a directory of files
+ * @param <T> Cleaner delegate class that is dynamically loaded from configuration
+ */
+public abstract class CleanerChore<T extends FileCleanerDelegate> extends Chore {
+
+  private static final Log LOG = LogFactory.getLog(CleanerChore.class.getName());
+
+  private final FileSystem fs;
+  private final Path oldFileDir;
+  private final Configuration conf;
+  List<T> cleanersChain;
+
+  /**
+   * @param name name of the chore being run
+   * @param sleepPeriod the period of time to sleep between each run
+   * @param s the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param oldFileDir the path to the archived files
+   * @param confKey configuration key for the classes to instantiate
+   */
+  public CleanerChore(String name, final int sleepPeriod, final Stoppable s, Configuration conf,
+      FileSystem fs, Path oldFileDir, String confKey) {
+    super(name, sleepPeriod, s);
+    this.fs = fs;
+    this.oldFileDir = oldFileDir;
+    this.conf = conf;
+
+    initCleanerChain(confKey);
+  }
+
+  /**
+   * Validate the file to see if it even belongs in the directory. If it is valid, then the file
+   * will go through the cleaner delegates, but otherwise the file is just deleted.
+   * @param file full {@link Path} of the file to be checked
+   * @return <tt>true</tt> if the file is valid, <tt>false</tt> otherwise
+   */
+  protected abstract boolean validate(Path file);
+
+  /**
+   * Instanitate and initialize all the file cleaners set in the configuration
+   * @param confKey key to get the file cleaner classes from the configuration
+   */
+  private void initCleanerChain(String confKey) {
+    this.cleanersChain = new LinkedList<T>();
+    String[] logCleaners = conf.getStrings(confKey);
+    if (logCleaners != null) {
+      for (String className : logCleaners) {
+        T logCleaner = newFileCleaner(className, conf);
+        if (logCleaner != null) this.cleanersChain.add(logCleaner);
+      }
+    }
+  }
+
+  /**
+   * A utility method to create new instances of LogCleanerDelegate based on the class name of the
+   * LogCleanerDelegate.
+   * @param className fully qualified class name of the LogCleanerDelegate
+   * @param conf
+   * @return the new instance
+   */
+  public T newFileCleaner(String className, Configuration conf) {
+    try {
+      Class<? extends FileCleanerDelegate> c = Class.forName(className).asSubclass(
+        FileCleanerDelegate.class);
+      @SuppressWarnings("unchecked")
+      T cleaner = (T) c.newInstance();
+      cleaner.setConf(conf);
+      return cleaner;
+    } catch (Exception e) {
+      LOG.warn("Can NOT create CleanerDelegate: " + className, e);
+      // skipping if can't instantiate
+      return null;
+    }
+  }
+
+  @Override
+  protected void chore() {
+    try {
+      FileStatus[] files = FSUtils.listStatus(this.fs, this.oldFileDir, null);
+      // if the path (file or directory) doesn't exist, then we can just return
+      if (files == null) return;
+      // loop over the found files and see if they should be deleted
+      for (FileStatus file : files) {
+        try {
+          if (file.isDir()) checkAndDeleteDirectory(file.getPath());
+          else checkAndDelete(file.getPath());
+        } catch (IOException e) {
+          e = RemoteExceptionHandler.checkIOException(e);
+          LOG.warn("Error while cleaning the logs", e);
+        }
+      }
+    } catch (IOException e) {
+      LOG.warn("Failed to get status of:" + oldFileDir);
+    }
+
+  }
+
+  /**
+   * Attempt to delete a directory and all files under that directory. Each child file is passed
+   * through the delegates to see if it can be deleted. If the directory has not children when the
+   * cleaners have finished it is deleted.
+   * <p>
+   * If new children files are added between checks of the directory, the directory will <b>not</b>
+   * be deleted.
+   * @param toCheck directory to check
+   * @return <tt>true</tt> if the directory was deleted, <tt>false</tt> otherwise.
+   * @throws IOException if there is an unexpected filesystem error
+   */
+  private boolean checkAndDeleteDirectory(Path toCheck) throws IOException {
+    LOG.debug("Checking directory: " + toCheck);
+    FileStatus[] children = FSUtils.listStatus(fs, toCheck, null);
+    // if the directory doesn't exist, then we are done
+    if (children == null) return true;
+
+    boolean canDeleteThis = true;
+    for (FileStatus child : children) {
+      Path path = child.getPath();
+      // attempt to delete all the files under the directory
+      if (child.isDir()) {
+        if (!checkAndDeleteDirectory(path)) {
+          canDeleteThis = false;
+        }
+      }
+      // otherwise we can just check the file
+      else if (!checkAndDelete(path)) {
+        canDeleteThis = false;
+      }
+    }
+
+    // if all the children have been deleted, then we should try to delete this directory. However,
+    // don't do so recursively so we don't delete files that have been added since we checked.
+    return canDeleteThis ? fs.delete(toCheck, false) : false;
+  }
+
+  /**
+   * Run the given file through each of the cleaners to see if it should be deleted, deleting it if
+   * necessary.
+   * @param filePath path of the file to check (and possibly delete)
+   * @throws IOException if cann't delete a file because of a filesystem issue
+   * @throws IllegalArgumentException if the file is a directory and has children
+   */
+  private boolean checkAndDelete(Path filePath) throws IOException, IllegalArgumentException {
+    // first check to see if the path is valid
+    if (!validate(filePath)) {
+      LOG.warn("Found a wrongly formatted file: " + filePath.getName() + " deleting it.");
+      boolean success = this.fs.delete(filePath, true);
+      if (!success) LOG.warn("Attempted to delete:" + filePath
+          + ", but couldn't. Run cleaner chain and attempt to delete on next pass.");
+
+      return success;
+    }
+    // check each of the cleaners for the file
+    for (T cleaner : cleanersChain) {
+      if (cleaner.isStopped() || this.stopper.isStopped()) {
+        LOG.warn("A file cleaner" + this.getName() + " is stopped, won't delete any file in:"
+            + this.oldFileDir);
+        return false;
+      }
+
+      if (!cleaner.isFileDeletable(filePath)) {
+        // this file is not deletable, then we are done
+        LOG.debug(filePath + " is not deletable according to:" + cleaner);
+        return false;
+      }
+    }
+    // delete this file if it passes all the cleaners
+    LOG.debug("Removing:" + filePath + " from archive");
+    boolean success = this.fs.delete(filePath, false);
+    if (!success) {
+      LOG.warn("Attempted to delete:" + filePath
+          + ", but couldn't. Run cleaner chain and attempt to delete on next pass.");
+    }
+    return success;
+  }
+
+  @Override
+  public void cleanup() {
+    for (T lc : this.cleanersChain) {
+      try {
+        lc.stop("Exiting");
+      } catch (Throwable t) {
+        LOG.warn("Stopping", t);
+      }
+    }
+  }
+}
\ No newline at end of file

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+
+/**
+ * General interface for cleaning files from a folder (generally an archive or
+ * backup folder). These are chained via the {@link CleanerChore} to determine
+ * if a given file should be deleted.
+ */
+@InterfaceAudience.Private
+public interface FileCleanerDelegate extends Configurable, Stoppable {
+
+  /**
+   * Should the master delete the file or keep it?
+   * @param file full path to the file to check
+   * @return <tt>true</tt> if the file is deletable, <tt>false</tt> if not
+   */
+  public boolean isFileDeletable(Path file);
+}
\ No newline at end of file

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+/**
+ * This Chore, every time it runs, will clear the HFiles in the hfile archive
+ * folder that are deletable for each HFile cleaner in the chain.
+ */
+@InterfaceAudience.Private
+public class HFileCleaner extends CleanerChore<BaseHFileCleanerDelegate> {
+
+  public static final String MASTER_HFILE_CLEANER_PLUGINS = "hbase.master.hfilecleaner.plugins";
+
+  /**
+   * @param period the period of time to sleep between each run
+   * @param stopper the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param directory directory to be cleaned
+   */
+  public HFileCleaner(final int period, final Stoppable stopper, Configuration conf, FileSystem fs,
+      Path directory) {
+    super("HFileCleaner", period, stopper, conf, fs, directory, MASTER_HFILE_CLEANER_PLUGINS);
+  }
+
+  @Override
+  protected boolean validate(Path file) {
+    return StoreFile.validateStoreFileName(file.getName());
+  }
+}

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/LogCleaner.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+
+/**
+ * This Chore, every time it runs, will attempt to delete the HLogs in the old logs folder. The HLog
+ * is only deleted if none of the cleaner delegates says otherwise.
+ * @see BaseLogCleanerDelegate
+ */
+@InterfaceAudience.Private
+public class LogCleaner extends CleanerChore<BaseLogCleanerDelegate> {
+  static final Log LOG = LogFactory.getLog(LogCleaner.class.getName());
+
+  /**
+   * @param p the period of time to sleep between each run
+   * @param s the stopper
+   * @param conf configuration to use
+   * @param fs handle to the FS
+   * @param oldLogDir the path to the archived logs
+   */
+  public LogCleaner(final int p, final Stoppable s, Configuration conf, FileSystem fs,
+      Path oldLogDir) {
+    super("LogsCleaner", p, s, conf, fs, oldLogDir, HBASE_MASTER_LOGCLEANER_PLUGINS);
+  }
+
+  @Override
+  protected boolean validate(Path file) {
+    return HLog.validateHLogFilename(file.getName());
+  }
+}

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveHFileCleaner.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+/**
+ * HFile cleaner that uses the timestamp of the hfile to determine if it should be deleted. By
+ * default they are allowed to live for {@value TimeToLiveHFileCleaner#DEFAULT_TTL}
+ */
+@InterfaceAudience.Private
+public class TimeToLiveHFileCleaner extends BaseHFileCleanerDelegate {
+
+  public static final Log LOG = LogFactory.getLog(TimeToLiveHFileCleaner.class.getName());
+  public static final String TTL_CONF_KEY = "hbase.master.hfilecleaner.ttl";
+  // default ttl = 5 minutes
+  private static final long DEFAULT_TTL = 60000 * 5;
+  // Configured time a hfile can be kept after it was moved to the archive
+  private long ttl;
+  private FileSystem fs;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.ttl = conf.getLong(TTL_CONF_KEY, DEFAULT_TTL);
+    super.setConf(conf);
+  }
+
+  @Override
+  public boolean isFileDeletable(Path filePath) {
+    if (!instantiateFS()) {
+      return false;
+    }
+    long time = 0;
+    long currentTime = EnvironmentEdgeManager.currentTimeMillis();
+    try {
+      FileStatus fStat = fs.getFileStatus(filePath);
+      time = fStat.getModificationTime();
+    } catch (IOException e) {
+      LOG.error("Unable to get modification time of file " + filePath.getName()
+          + ", not deleting it.", e);
+      return false;
+    }
+    long life = currentTime - time;
+    LOG.debug("Life:" + life + ", ttl:" + ttl + ", current:" + currentTime + ", from: " + time);
+    if (life < 0) {
+      LOG.warn("Found a log (" + filePath + ") newer than current time (" + currentTime + " < "
+          + time + "), probably a clock skew");
+      return false;
+    }
+    return life > ttl;
+  }
+
+  /**
+   * setup the filesystem, if it hasn't been already
+   */
+  private synchronized boolean instantiateFS() {
+    if (this.fs == null) {
+      try {
+        this.fs = FileSystem.get(this.getConf());
+      } catch (IOException e) {
+        LOG.error("Couldn't instantiate the file system, not deleting file, just incase");
+        return false;
+      }
+    }
+    return true;
+  }
+}
\ No newline at end of file

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/cleaner/TimeToLiveLogCleaner.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.cleaner;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Log cleaner that uses the timestamp of the hlog to determine if it should
+ * be deleted. By default they are allowed to live for 10 minutes.
+ */
+@InterfaceAudience.Private
+public class TimeToLiveLogCleaner extends BaseLogCleanerDelegate {
+  static final Log LOG = LogFactory.getLog(TimeToLiveLogCleaner.class.getName());
+  // Configured time a log can be kept after it was closed
+  private long ttl;
+  private boolean stopped = false;
+
+  @Override
+  public boolean isLogDeletable(Path filePath) {
+    long time = 0;
+    long currentTime = System.currentTimeMillis();
+    try {
+      FileStatus fStat = filePath.getFileSystem(this.getConf()).getFileStatus(filePath);
+      time = fStat.getModificationTime();
+    } catch (IOException e) {
+      LOG.error("Unable to get modification time of file " + filePath.getName() +
+      ", not deleting it.", e);
+      return false;
+    }
+    long life = currentTime - time;
+    if (life < 0) {
+      LOG.warn("Found a log newer than current time, " +
+          "probably a clock skew");
+      return false;
+    }
+    return life > ttl;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    this.ttl = conf.getLong("hbase.master.logcleaner.ttl", 600000);
+  }
+
+
+  @Override
+  public void stop(String why) {
+    this.stopped = true;
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+}
\ No newline at end of file

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Thu Nov  1 20:54:05 2012
@@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.HTableDes
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.UnknownScannerException;
+import org.apache.hadoop.hbase.backup.HFileArchiver;
 import org.apache.hadoop.hbase.client.Append;
 import org.apache.hadoop.hbase.client.RowMutations;
 import org.apache.hadoop.hbase.client.Delete;
@@ -869,16 +870,7 @@ public class HRegion implements HeapSize
       writestate.writesEnabled = false;
       wasFlushing = writestate.flushing;
       LOG.debug("Closing " + this + ": disabling compactions & flushes");
-      while (writestate.compacting > 0 || writestate.flushing) {
-        LOG.debug("waiting for " + writestate.compacting + " compactions" +
-            (writestate.flushing ? " & cache flush" : "") +
-            " to complete for region " + this);
-        try {
-          writestate.wait();
-        } catch (InterruptedException iex) {
-          // continue
-        }
-      }
+      waitForFlushesAndCompactions();
     }
     // If we were not just flushing, is it worth doing a preflush...one
     // that will clear out of the bulk of the memstore before we put up
@@ -953,6 +945,26 @@ public class HRegion implements HeapSize
     }
   }
 
+  /**
+   * Wait for all current flushes and compactions of the region to complete.
+   * <p>
+   * Exposed for TESTING.
+   */
+  public void waitForFlushesAndCompactions() {
+    synchronized (writestate) {
+      while (writestate.compacting > 0 || writestate.flushing) {
+        LOG.debug("waiting for " + writestate.compacting + " compactions"
+            + (writestate.flushing ? " & cache flush" : "") + " to complete for region " + this);
+        try {
+          writestate.wait();
+        } catch (InterruptedException iex) {
+          // essentially ignore and propagate the interrupt back up
+          Thread.currentThread().interrupt();
+        }
+      }
+    }
+  }
+
   protected ThreadPoolExecutor getStoreOpenAndCloseThreadPool(
       final String threadNamePrefix) {
     int numStores = Math.max(1, this.htableDescriptor.getFamilies().size());
@@ -4088,8 +4100,13 @@ public class HRegion implements HeapSize
       LOG.debug("Files for new region");
       listPaths(fs, dstRegion.getRegionDir());
     }
-    deleteRegion(fs, a.getRegionDir());
-    deleteRegion(fs, b.getRegionDir());
+
+    // delete out the 'A' region
+    HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(a.getConf()), a.getTableDir(),
+      a.getRegionDir());
+    // delete out the 'B' region
+    HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(b.getConf()), b.getTableDir(),
+      b.getRegionDir());
 
     LOG.info("merge completed. New region is " + dstRegion);
 

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Thu Nov  1 20:54:05 2012
@@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.HRegionIn
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValue.KVComparator;
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.backup.HFileArchiver;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.io.HeapSize;
@@ -457,7 +458,7 @@ public class Store extends SchemaConfigu
   /**
    * @return All store files.
    */
-  List<StoreFile> getStorefiles() {
+  public List<StoreFile> getStorefiles() {
     return this.storefiles;
   }
 
@@ -1732,10 +1733,12 @@ public class Store extends SchemaConfigu
 
       // Tell observers that list of StoreFiles has changed.
       notifyChangedReadersObservers();
-      // Finally, delete old store files.
-      for (StoreFile hsf: compactedFiles) {
-        hsf.deleteReader();
-      }
+
+      // let the archive util decide if we should archive or delete the files
+      LOG.debug("Removing store files after compaction...");
+      HFileArchiver.archiveStoreFiles(this.fs, this.region, this.conf, this.family.getName(),
+        compactedFiles);
+
     } catch (IOException e) {
       e = RemoteExceptionHandler.checkIOException(e);
       LOG.error("Failed replacing compacted files in " + this.storeNameStr +

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Thu Nov  1 20:54:05 2012
@@ -861,13 +861,20 @@ public class StoreFile extends SchemaCon
   }
 
   /**
-   * Write out a split reference.
-   *
-   * Package local so it doesnt leak out of regionserver.
-   *
+   * Validate the store file name.
+   * @param fileName name of the file to validate
+   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
+   */
+  public static boolean validateStoreFileName(String fileName) {
+    return !fileName.contains("-");
+  }
+
+  /**
+   * Write out a split reference. Package local so it doesnt leak out of
+   * regionserver.
    * @param fs
    * @param splitDir Presumes path format is actually
-   * <code>SOME_DIRECTORY/REGIONNAME/FAMILY</code>.
+   *          <code>SOME_DIRECTORY/REGIONNAME/FAMILY</code>.
    * @param f File to split.
    * @param splitRow
    * @param range

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Thu Nov  1 20:54:05 2012
@@ -1768,6 +1768,11 @@ public class HLog implements Syncable {
     return dir;
   }
   
+  /**
+   * @param filename name of the file to validate
+   * @return <tt>true</tt> if the filename matches an HLog, <tt>false</tt>
+   *         otherwise
+   */
   public static boolean validateHLogFilename(String filename) {
     return pattern.matcher(filename).matches();
   }

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java Thu Nov  1 20:54:05 2012
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.client.HConnectionManager;
-import org.apache.hadoop.hbase.master.LogCleanerDelegate;
+import org.apache.hadoop.hbase.master.cleaner.BaseLogCleanerDelegate;
 import org.apache.hadoop.hbase.replication.ReplicationZookeeper;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
@@ -40,9 +40,8 @@ import java.util.Set;
  * Implementation of a log cleaner that checks if a log is still scheduled for
  * replication before deleting it when its TTL is over.
  */
-public class ReplicationLogCleaner implements LogCleanerDelegate, Abortable {
+public class ReplicationLogCleaner extends BaseLogCleanerDelegate implements Abortable {
   private static final Log LOG = LogFactory.getLog(ReplicationLogCleaner.class);
-  private Configuration conf;
   private ReplicationZookeeper zkHelper;
   private Set<String> hlogs = new HashSet<String>();
   private boolean stopped = false;
@@ -67,7 +66,7 @@ public class ReplicationLogCleaner imple
 
     // all members of this class are null if replication is disabled, and we
     // return true since false would render the LogsCleaner useless
-    if (this.conf == null) {
+    if (this.getConf() == null) {
       return true;
     }
     String log = filePath.getName();
@@ -122,18 +121,18 @@ public class ReplicationLogCleaner imple
   }
 
   @Override
-  public void setConf(Configuration conf) {
+  public void setConf(Configuration config) {
     // If replication is disabled, keep all members null
-    if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY, false)) {
+    if (!config.getBoolean(HConstants.REPLICATION_ENABLE_KEY, false)) {
       return;
     }
     // Make my own Configuration.  Then I'll have my own connection to zk that
     // I can close myself when comes time.
-    this.conf = new Configuration(conf);
+    Configuration conf = new Configuration(config);
+    super.setConf(conf);
     try {
-      ZooKeeperWatcher zkw =
-          new ZooKeeperWatcher(this.conf, "replicationLogCleaner", null);
-      this.zkHelper = new ReplicationZookeeper(this, this.conf, zkw);
+      ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "replicationLogCleaner", null);
+      this.zkHelper = new ReplicationZookeeper(this, conf, zkw);
     } catch (KeeperException e) {
       LOG.error("Error while configuring " + this.getClass().getName(), e);
     } catch (IOException e) {
@@ -142,10 +141,6 @@ public class ReplicationLogCleaner imple
     refreshHLogsAndSearch(null);
   }
 
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
 
   @Override
   public void stop(String why) {
@@ -156,7 +151,7 @@ public class ReplicationLogCleaner imple
       this.zkHelper.getZookeeperWatcher().close();
     }
     // Not sure why we're deleting a connection that we never acquired or used
-    HConnectionManager.deleteConnection(this.conf, true);
+    HConnectionManager.deleteConnection(this.getConf(), true);
   }
 
   @Override

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=1404762&r1=1404761&r2=1404762&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java Thu Nov  1 20:54:05 2012
@@ -562,6 +562,10 @@ public abstract class FSUtils {
     return p.makeQualified(fs);
   }
 
+  public static void setRootDir(final Configuration c, final Path root) throws IOException {
+    c.set(HConstants.HBASE_DIR, root.toString());
+  }
+
   /**
    * Checks if root region exists
    *
@@ -1187,4 +1191,36 @@ public abstract class FSUtils {
   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
     return fs.exists(path);
   }
+
+  /**
+   * Log the current state of the filesystem from a certain root directory
+   * @param fs filesystem to investigate
+   * @param root root file/directory to start logging from
+   * @param LOG log to output information
+   * @throws IOException if an unexpected exception occurs
+   */
+  public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
+      throws IOException {
+    LOG.debug("Current file system:");
+    logFSTree(LOG, fs, root, "|-");
+  }
+
+  /**
+   * Recursive helper to log the state of the FS
+   * @see #logFileSystemState(FileSystem, Path, Log)
+   */
+  private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix)
+      throws IOException {
+    FileStatus[] files = FSUtils.listStatus(fs, root, null);
+    if (files == null) return;
+
+    for (FileStatus file : files) {
+      if (file.isDir()) {
+        LOG.debug(prefix + file.getPath().getName() + "/");
+        logFSTree(LOG, fs, file.getPath(), prefix + "---");
+      } else {
+        LOG.debug(prefix + file.getPath().getName());
+      }
+    }
+  }
 }

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java?rev=1404762&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java Thu Nov  1 20:54:05 2012
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.Store;
+
+/**
+ * Helper class for all utilities related to archival/retrieval of HFiles
+ */
+public class HFileArchiveUtil {
+
+  private HFileArchiveUtil() {
+    // non-external instantiation - util class
+  }
+
+  /**
+   * Get the directory to archive a store directory
+   * @param conf {@link Configuration} to read for the archive directory name
+   * @param region parent region information under which the store currently
+   *          lives
+   * @param family name of the family in the store
+   * @return {@link Path} to the directory to archive the given store or
+   *         <tt>null</tt> if it should not be archived
+   */
+  public static Path getStoreArchivePath(Configuration conf, HRegion region, byte [] family){
+    return getStoreArchivePath(conf, region.getRegionInfo(), region.getTableDir(), family);
+  }
+
+  /**
+   * Get the directory to archive a store directory
+   * @param conf {@link Configuration} to read for the archive directory name. Can be null.
+   * @param region parent region information under which the store currently lives
+   * @param tabledir directory for the table under which the store currently lives
+   * @param family name of the family in the store
+   * @return {@link Path} to the directory to archive the given store or <tt>null</tt> if it should
+   *         not be archived
+   */
+  public static Path getStoreArchivePath(Configuration conf, HRegionInfo region, Path tabledir,
+      byte[] family) {
+    Path tableArchiveDir = getTableArchivePath(tabledir);
+    return Store.getStoreHomedir(tableArchiveDir,
+      HRegionInfo.encodeRegionName(region.getRegionName()), family);
+  }
+
+  /**
+   * Get the archive directory for a given region under the specified table
+   * @param conf {@link Configuration} to read the archive directory from. Can be null
+   * @param tabledir the original table directory. Cannot be null.
+   * @param regiondir the path to the region directory. Cannot be null.
+   * @return {@link Path} to the directory to archive the given region, or <tt>null</tt> if it
+   *         should not be archived
+   */
+  public static Path getRegionArchiveDir(Configuration conf, Path tabledir, Path regiondir) {
+    // get the archive directory for a table
+    Path archiveDir = getTableArchivePath(tabledir);
+
+    // then add on the region path under the archive
+    String encodedRegionName = regiondir.getName();
+    return HRegion.getRegionDir(archiveDir, encodedRegionName);
+  }
+
+  /**
+   * Get the path to the table archive directory based on the configured archive directory.
+   * <p>
+   * Get the path to the table's archive directory.
+   * <p>
+   * Generally of the form: /hbase/.archive/[tablename]
+   * @param tabledir directory of the table to be archived. Cannot be null.
+   * @return {@link Path} to the archive directory for the table
+   */
+  public static Path getTableArchivePath(Path tabledir) {
+    Path root = tabledir.getParent();
+    return new Path(new Path(root,HConstants.HFILE_ARCHIVE_DIRECTORY), tabledir.getName());
+  }
+
+  /**
+   * Get the full path to the archive directory on the configured {@link FileSystem}
+   * @param conf to look for archive directory name and root directory. Cannot be null. Notes for
+   *          testing: requires a FileSystem root directory to be specified.
+   * @return the full {@link Path} to the archive directory, as defined by the configuration
+   * @throws IOException if an unexpected error occurs
+   */
+  public static Path getArchivePath(Configuration conf) throws IOException {
+    return new Path(FSUtils.getRootDir(conf), HConstants.HFILE_ARCHIVE_DIRECTORY);
+  }
+}